{"id":11033980,"identifier":"DVN/4DUBJX","persistentUrl":"https://doi.org/10.7910/DVN/4DUBJX","protocol":"doi","authority":"10.7910","separator":"/","publisher":"Harvard Dataverse","publicationDate":"2025-04-02","storageIdentifier":"s3://10.7910/DVN/4DUBJX","datasetType":"dataset","datasetVersion":{"id":441123,"datasetId":11033980,"datasetPersistentId":"doi:10.7910/DVN/4DUBJX","storageIdentifier":"s3://10.7910/DVN/4DUBJX","versionNumber":1,"versionMinorNumber":0,"versionState":"RELEASED","latestVersionPublishingState":"RELEASED","deaccessionLink":"","UNF":"UNF:6:5HKIlWTQwgwDI29MfVc/5Q==","lastUpdateTime":"2025-04-03T00:59:57Z","releaseTime":"2025-04-03T00:59:57Z","createTime":"2025-03-27T16:55:12Z","publicationDate":"2025-04-02","citationDate":"2025-04-02","license":{"name":"CC BY 4.0","uri":"http://creativecommons.org/licenses/by/4.0","iconUri":"https://licensebuttons.net/l/by/4.0/88x31.png"},"fileAccessRequest":true,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"ATOMICA"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Fang, Ada"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Harvard University"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"0009-0003-7957-1905"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Zaixi Zhang"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Harvard University"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Andrew Zhou"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Harvard University"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Marinka Zitnik"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Harvard University"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"0000-0001-8530-7228"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Fang, Ada"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Harvard University"},"datasetContactEmail":{"typeName":"datasetContactEmail","multiple":false,"typeClass":"primitive","value":"ada_fang@fas.harvard.edu"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"Datasets used in developing & evaluating ATOMICA."}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Chemistry","Computer and Information Science","Medicine, Health and Life Sciences"]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"Fang, Ada"},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2025-03-27"}]}},"files":[{"description":"30% sequence similarity split for ADP ligands on protein-small molecule complexes","label":"ADP_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034562,"persistentId":"","filename":"ADP_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":12799617,"description":"30% sequence similarity split for ADP ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e03fc8-6c68b2124cd3","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":11462074,"originalFileName":"ADP_ids_sequence_30_split.csv","UNF":"UNF:6:LDK8HUM0sSAUVE5LyLMcZQ==","rootDataFileId":-1,"md5":"795204a97b7c494b8ff83895933a0c21","checksum":{"type":"MD5","value":"795204a97b7c494b8ff83895933a0c21"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"AlphaFold3 structures and confidence scores of predicted dark proteome metal ion and small molecule complexes","label":"annotated_dark_proteome_AF3_outputs.tar.gz","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":1,"datasetVersionId":441123,"dataFile":{"id":11036980,"persistentId":"","filename":"annotated_dark_proteome_AF3_outputs.tar.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":89416322,"description":"AlphaFold3 structures and confidence scores of predicted dark proteome metal ion and small molecule complexes","storageIdentifier":"s3://dvn-cloud:195e39a1141-a64f1e1ec20d","rootDataFileId":-1,"md5":"9cce8002d06256c35922fa4b865fead3","checksum":{"type":"MD5","value":"9cce8002d06256c35922fa4b865fead3"},"tabularData":false,"creationDate":"2025-03-29","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"ATOMICANet-Ion for protein-ion interfaceome","label":"ATOMICANet_ion.gml","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034594,"persistentId":"","filename":"ATOMICANet_ion.gml","contentType":"application/gml+xml","friendlyType":"application/gml+xml","filesize":4621912,"description":"ATOMICANet-Ion for protein-ion interfaceome","storageIdentifier":"s3://dvn-cloud:195d8f11b42-250ed2981887","rootDataFileId":-1,"md5":"673859abc385af17a64765ac3bbc034b","checksum":{"type":"MD5","value":"673859abc385af17a64765ac3bbc034b"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"ATOMICANet-Lipid for protein-lipid interfaceome","label":"ATOMICANet_lipid.gml","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034591,"persistentId":"","filename":"ATOMICANet_lipid.gml","contentType":"application/gml+xml","friendlyType":"application/gml+xml","filesize":4511346,"description":"ATOMICANet-Lipid for protein-lipid interfaceome","storageIdentifier":"s3://dvn-cloud:195d8f11d6d-2249a40de2c3","rootDataFileId":-1,"md5":"022c7d9e7ca7a944a937b0fd566998bf","checksum":{"type":"MD5","value":"022c7d9e7ca7a944a937b0fd566998bf"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"ATOMICANet-Nucleic-Acid for protein-nucleic acid interfaceome","label":"ATOMICANet_nucleic_acid.gml","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034593,"persistentId":"","filename":"ATOMICANet_nucleic_acid.gml","contentType":"application/gml+xml","friendlyType":"application/gml+xml","filesize":5092933,"description":"ATOMICANet-Nucleic-Acid for protein-nucleic acid interfaceome","storageIdentifier":"s3://dvn-cloud:195d8f1211b-0d68d1bdbeaa","rootDataFileId":-1,"md5":"908f29521ae5610aab6edea213e9670b","checksum":{"type":"MD5","value":"908f29521ae5610aab6edea213e9670b"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"ATOMICANet-Protein for protein-protein interfaceome","label":"ATOMICANet_protein.gml","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034590,"persistentId":"","filename":"ATOMICANet_protein.gml","contentType":"application/gml+xml","friendlyType":"application/gml+xml","filesize":14328029,"description":"ATOMICANet-Protein for protein-protein interfaceome","storageIdentifier":"s3://dvn-cloud:195d8f11fdc-116293c69569","rootDataFileId":-1,"md5":"23f5377ef396566654008d557afff8c2","checksum":{"type":"MD5","value":"23f5377ef396566654008d557afff8c2"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"ATOMICANet-Small-Molecule for protein-small molecule interfaceome","label":"ATOMICANet_small_molecule.gml","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034592,"persistentId":"","filename":"ATOMICANet_small_molecule.gml","contentType":"application/gml+xml","friendlyType":"application/gml+xml","filesize":4718642,"description":"ATOMICANet-Small-Molecule for protein-small molecule interfaceome","storageIdentifier":"s3://dvn-cloud:195d8f11c68-ca6b03c83f6a","rootDataFileId":-1,"md5":"7c12752d2e2597d3d5e6c6470cdb00a7","checksum":{"type":"MD5","value":"7c12752d2e2597d3d5e6c6470cdb00a7"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"ATOMICAScore for identification of amino acid blocks involved in intermolecular bonds.","label":"ATOMICAScore_protein_small_molecule_results.tab","restricted":false,"directoryLabel":"analyses","version":3,"datasetVersionId":441123,"dataFile":{"id":11033981,"persistentId":"","filename":"ATOMICAScore_protein_small_molecule_results.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":436531,"description":"ATOMICAScore for identification of amino acid blocks involved in intermolecular bonds.","storageIdentifier":"s3://dvn-cloud:195d88dd710-bbdcab6cdc94","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":425587,"originalFileName":"ATOMICAScore_protein_small_molecule_results.csv","UNF":"UNF:6:CgFPlCHL6R4A7UpNxTlPQA==","rootDataFileId":-1,"md5":"76969812c7fed425b8e665ea31d885eb","checksum":{"type":"MD5","value":"76969812c7fed425b8e665ea31d885eb"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for ATP ligands on protein-small molecule complexes","label":"ATP_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034561,"persistentId":"","filename":"ATP_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":12700078,"description":"30% sequence similarity split for ATP ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e041f3-e9655b8c7c13","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":11372851,"originalFileName":"ATP_ids_sequence_30_split.csv","UNF":"UNF:6:IcRGbZgxYwKdA6tCnYag2A==","rootDataFileId":-1,"md5":"8ed28c536eba26f470c580bff2cff412","checksum":{"type":"MD5","value":"8ed28c536eba26f470c580bff2cff412"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for CA ligands on protein-ion complexes","label":"CA_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034569,"persistentId":"","filename":"CA_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7521800,"description":"30% sequence similarity split for CA ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e04364-051ca9485706","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6712717,"originalFileName":"CA_ids_sequence_30_split.csv","UNF":"UNF:6:KMAaUCWusRU7w419n53vkA==","rootDataFileId":-1,"md5":"dff748cdc3ca7554ed14505af331cca4","checksum":{"type":"MD5","value":"dff748cdc3ca7554ed14505af331cca4"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for CIT ligands on protein-small molecule complexes","label":"CIT_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034575,"persistentId":"","filename":"CIT_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":11764448,"description":"30% sequence similarity split for CIT ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e0454d-88e570a7e370","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":10533229,"originalFileName":"CIT_ids_sequence_30_split.csv","UNF":"UNF:6:mcfcqtIqd1YtfLuFlIQxeg==","rootDataFileId":-1,"md5":"bae3c7ef9ddc15f3fb10901c13b85e68","checksum":{"type":"MD5","value":"bae3c7ef9ddc15f3fb10901c13b85e68"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for CLA ligands on protein-small molecule complexes","label":"CLA_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034579,"persistentId":"","filename":"CLA_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":11821299,"description":"30% sequence similarity split for CLA ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e0474a-80f843544d44","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":10584684,"originalFileName":"CLA_ids_sequence_30_split.csv","UNF":"UNF:6:gmZqNjII8J4S9H8n0stiOg==","rootDataFileId":-1,"md5":"e6f85754b1c7b1f327718f41505fc4fd","checksum":{"type":"MD5","value":"e6f85754b1c7b1f327718f41505fc4fd"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for CO ligands on protein-ion complexes","label":"CO_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034566,"persistentId":"","filename":"CO_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7517528,"description":"30% sequence similarity split for CO ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e048b5-c022849633c9","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6708909,"originalFileName":"CO_ids_sequence_30_split.csv","UNF":"UNF:6:SWJEVMiBBW5lrmzFsOFdUQ==","rootDataFileId":-1,"md5":"b9ba82201df2f1d60afb554988dbde81","checksum":{"type":"MD5","value":"b9ba82201df2f1d60afb554988dbde81"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"CSD molecule motif similarity split","label":"CSD_ids.csv","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033997,"persistentId":"","filename":"CSD_ids.csv","contentType":"text/csv","friendlyType":"Comma Separated Values","filesize":194350982,"description":"CSD molecule motif similarity split","storageIdentifier":"s3://dvn-cloud:195d896e7b9-79830cda4b63","rootDataFileId":-1,"md5":"72c5d3c76eb24b031da62c4f36e4eb44","checksum":{"type":"MD5","value":"72c5d3c76eb24b031da62c4f36e4eb44"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"CSD pre-training data","label":"CSD.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033988,"persistentId":"","filename":"CSD.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":2616073958,"description":"CSD pre-training data","storageIdentifier":"s3://dvn-cloud:195d896a3c4-b772dc680a06","rootDataFileId":-1,"md5":"a9c4e5c76b54217d384b18d4c98e4069","checksum":{"type":"MD5","value":"a9c4e5c76b54217d384b18d4c98e4069"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for CU ligands on protein-ion complexes","label":"CU_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034565,"persistentId":"","filename":"CU_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7460698,"description":"30% sequence similarity split for CU ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e04a11-3ac699b2368a","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6657723,"originalFileName":"CU_ids_sequence_30_split.csv","UNF":"UNF:6:deP/TqmZcjtq5B4a+qQyQQ==","rootDataFileId":-1,"md5":"fbe7b51b2471402ceefe072c211d90ff","checksum":{"type":"MD5","value":"fbe7b51b2471402ceefe072c211d90ff"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Predictions of ATOMICA-Ligand on dark proteome ion and small molecule binding sites","label":"dark_proteome_predictions.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":3,"datasetVersionId":441123,"dataFile":{"id":11034865,"persistentId":"","filename":"dark_proteome_predictions.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":168509,"description":"Predictions of ATOMICA-Ligand on dark proteome ion and small molecule binding sites","storageIdentifier":"s3://dvn-cloud:195d92a4630-37dbe913e1c3","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":156268,"originalFileName":"dark_proteome_predictions.csv","UNF":"UNF:6:esWxnh1+pVYCV86nAhVL7A==","rootDataFileId":-1,"md5":"605aef7e1a1171b95bee8004a539ad51","checksum":{"type":"MD5","value":"605aef7e1a1171b95bee8004a539ad51"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for FAD ligands on protein-small molecule complexes","label":"FAD_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034572,"persistentId":"","filename":"FAD_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":12779481,"description":"30% sequence similarity split for FAD ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e04c16-523d90a0ca37","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":11444026,"originalFileName":"FAD_ids_sequence_30_split.csv","UNF":"UNF:6:2/hy9HcUiQJgi6uaRupi0g==","rootDataFileId":-1,"md5":"835d6b3394c9c1b760ae061215bcba19","checksum":{"type":"MD5","value":"835d6b3394c9c1b760ae061215bcba19"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for FE ligands on protein-ion complexes","label":"FE_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034574,"persistentId":"","filename":"FE_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7505505,"description":"30% sequence similarity split for FE ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e04d83-6242efacf158","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6698266,"originalFileName":"FE_ids_sequence_30_split.csv","UNF":"UNF:6:UOHdwrCxuymh8p9KMmKsPQ==","rootDataFileId":-1,"md5":"f81897359a6b2aad343730493559dd46","checksum":{"type":"MD5","value":"f81897359a6b2aad343730493559dd46"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for GDP ligands on protein-small molecule complexes","label":"GDP_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034571,"persistentId":"","filename":"GDP_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":12202167,"description":"30% sequence similarity split for GDP ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e04f8c-a31b876c894d","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":10926060,"originalFileName":"GDP_ids_sequence_30_split.csv","UNF":"UNF:6:4Y4Yzf+EXHwlGlzJmA/2sQ==","rootDataFileId":-1,"md5":"b2c280d04ef71d9eae09ac53894e1fd4","checksum":{"type":"MD5","value":"b2c280d04ef71d9eae09ac53894e1fd4"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for GTP ligands on protein-small molecule complexes","label":"GTP_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034576,"persistentId":"","filename":"GTP_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":12064989,"description":"30% sequence similarity split for GTP ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e05196-a9341e930c04","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":10803030,"originalFileName":"GTP_ids_sequence_30_split.csv","UNF":"UNF:6:vbYOTn7VbUyDd9ms1DBD9g==","rootDataFileId":-1,"md5":"4a521d1a293659b8069e511336e8d25a","checksum":{"type":"MD5","value":"4a521d1a293659b8069e511336e8d25a"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for HEC ligands on protein-small molecule complexes","label":"HEC_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034573,"persistentId":"","filename":"HEC_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":10264972,"description":"30% sequence similarity split for HEC ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e0535a-2442447e460d","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":9187701,"originalFileName":"HEC_ids_sequence_30_split.csv","UNF":"UNF:6:cCegCG8MKhvVG1heMEgv2A==","rootDataFileId":-1,"md5":"86c5133614e0fd8d9dcaf4740dacc869","checksum":{"type":"MD5","value":"86c5133614e0fd8d9dcaf4740dacc869"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for HEM ligands on protein-small molecule complexes","label":"HEM_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034563,"persistentId":"","filename":"HEM_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":12764375,"description":"30% sequence similarity split for HEM ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e05569-3bed9a036361","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":11430436,"originalFileName":"HEM_ids_sequence_30_split.csv","UNF":"UNF:6:n3Xba22j9a+xMc84zCIflQ==","rootDataFileId":-1,"md5":"acedf4d5883b0d5fcdfe86446d1894c5","checksum":{"type":"MD5","value":"acedf4d5883b0d5fcdfe86446d1894c5"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Processed AFDB Cluster representative proteins (with pLDDT > 90) which have predicted small molecule binding sites (with PeSTo confidence > 80)","label":"is_dark_90_plddt_PeSTo_80_ion.jsonl.gz","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":1,"datasetVersionId":441123,"dataFile":{"id":11037789,"persistentId":"","filename":"is_dark_90_plddt_PeSTo_80_ion.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":3868970,"description":"Processed AFDB Cluster representative proteins (with pLDDT > 90) which have predicted small molecule binding sites (with PeSTo confidence > 80)","storageIdentifier":"s3://dvn-cloud:195eee02d8b-c32db26bef4f","rootDataFileId":-1,"md5":"628d0949dedadf65cf5dc1e0b4dbb417","checksum":{"type":"MD5","value":"628d0949dedadf65cf5dc1e0b4dbb417"},"tabularData":false,"creationDate":"2025-03-31","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Processed AFDB Cluster representative proteins (with pLDDT > 90) which have predicted ion binding sites (with PeSTo confidence > 80)","label":"is_dark_90_plddt_PeSTo_80_small_molecule.jsonl.gz","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":1,"datasetVersionId":441123,"dataFile":{"id":11037790,"persistentId":"","filename":"is_dark_90_plddt_PeSTo_80_small_molecule.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":1644950,"description":"Processed AFDB Cluster representative proteins (with pLDDT > 90) which have predicted ion binding sites (with PeSTo confidence > 80)","storageIdentifier":"s3://dvn-cloud:195eee02e50-0abaec81660c","rootDataFileId":-1,"md5":"5d1f257ff56f73926de10107f86db042","checksum":{"type":"MD5","value":"5d1f257ff56f73926de10107f86db042"},"tabularData":false,"creationDate":"2025-03-31","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for K ligands on protein-ion complexes","label":"K_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034568,"persistentId":"","filename":"K_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7559496,"description":"30% sequence similarity split for K ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e056e3-ed4e18132976","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6746465,"originalFileName":"K_ids_sequence_30_split.csv","UNF":"UNF:6:C2L2+pA4uW9XjEHmeMHSzg==","rootDataFileId":-1,"md5":"14a20585f8f3c60f9213f18e2cf50f3a","checksum":{"type":"MD5","value":"14a20585f8f3c60f9213f18e2cf50f3a"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for MG ligands on protein-ion complexe complexes","label":"MG_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034580,"persistentId":"","filename":"MG_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7466852,"description":"30% sequence similarity split for MG ligands on protein-ion complexe complexes","storageIdentifier":"s3://dvn-cloud:195d8e05842-0d75e8f28396","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6663665,"originalFileName":"MG_ids_sequence_30_split.csv","UNF":"UNF:6:57pxZCJpRCm//0MIgLroWg==","rootDataFileId":-1,"md5":"a985e3ed0d7edef0fb8f30999f74272e","checksum":{"type":"MD5","value":"a985e3ed0d7edef0fb8f30999f74272e"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for MN ligands on protein-ion complexes","label":"MN_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034578,"persistentId":"","filename":"MN_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7580275,"description":"30% sequence similarity split for MN ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e059a3-ac4ba37ead2b","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6765148,"originalFileName":"MN_ids_sequence_30_split.csv","UNF":"UNF:6:J2Iu1Tpo2vqwUCOjZIJI2A==","rootDataFileId":-1,"md5":"81cfd2ed8ef71cf7a41ab62bb6be5e9b","checksum":{"type":"MD5","value":"81cfd2ed8ef71cf7a41ab62bb6be5e9b"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for NAD ligands on protein-small molecule complexes","label":"NAD_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034567,"persistentId":"","filename":"NAD_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":11849311,"description":"30% sequence similarity split for NAD ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e05cbb-c3b20eed1ad2","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":10609260,"originalFileName":"NAD_ids_sequence_30_split.csv","UNF":"UNF:6:/qTQMj4j7+nJDNXPGMNQ8Q==","rootDataFileId":-1,"md5":"37488c0d5feea179127043e7c56e2764","checksum":{"type":"MD5","value":"37488c0d5feea179127043e7c56e2764"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for NAP ligands on protein-small molecule complexes","label":"NAP_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034564,"persistentId":"","filename":"NAP_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":11809855,"description":"30% sequence similarity split for NAP ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e05eb2-14430c2723cb","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":10573932,"originalFileName":"NAP_ids_sequence_30_split.csv","UNF":"UNF:6:doR2+I+invnVWFKu8Oal/Q==","rootDataFileId":-1,"md5":"dcc0b8b372a1de6a8c30ef887bd2ce3f","checksum":{"type":"MD5","value":"dcc0b8b372a1de6a8c30ef887bd2ce3f"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for NDP ligands on protein-small molecule complexes","label":"NDP_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034577,"persistentId":"","filename":"NDP_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":11021332,"description":"30% sequence similarity split for NDP ligands on protein-small molecule complexes","storageIdentifier":"s3://dvn-cloud:195d8e060b0-ef5141012492","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":9866517,"originalFileName":"NDP_ids_sequence_30_split.csv","UNF":"UNF:6:gYOjKlHbMP8WxSNL0O5znA==","rootDataFileId":-1,"md5":"b8af92dbd72c59824f48466d0d8d20f6","checksum":{"type":"MD5","value":"b8af92dbd72c59824f48466d0d8d20f6"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-DNA 30% sequence similarity split","label":"PDNA_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033982,"persistentId":"","filename":"PDNA_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":109677,"description":"Protein-DNA 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d894e48d-f76a6f1d244e","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":98686,"originalFileName":"PDNA_ids.csv","UNF":"UNF:6:fkbEYNjsww0D7xAp+bpM4g==","rootDataFileId":-1,"md5":"e5d44ba5f5e56dd5a530c441949424fd","checksum":{"type":"MD5","value":"e5d44ba5f5e56dd5a530c441949424fd"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-DNA pre-training data","label":"PDNA.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033990,"persistentId":"","filename":"PDNA.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":53339541,"description":"Protein-DNA pre-training data","storageIdentifier":"s3://dvn-cloud:195d894e36a-7fe697c9539e","rootDataFileId":-1,"md5":"475ef9a0b4e66661c5019d1d29c504d1","checksum":{"type":"MD5","value":"475ef9a0b4e66661c5019d1d29c504d1"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Human proteome protein-ion interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","label":"pesto_70_plddt_70_ion.jsonl.gz","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034000,"persistentId":"","filename":"pesto_70_plddt_70_ion.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":21048537,"description":"Human proteome protein-ion interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","storageIdentifier":"s3://dvn-cloud:195d89f96dc-693729071318","rootDataFileId":-1,"md5":"9a89dfeeba01103d18251bf3ca678977","checksum":{"type":"MD5","value":"9a89dfeeba01103d18251bf3ca678977"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Human proteome protein-lipid interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","label":"pesto_70_plddt_70_lipid.jsonl.gz","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034003,"persistentId":"","filename":"pesto_70_plddt_70_lipid.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":55285687,"description":"Human proteome protein-lipid interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","storageIdentifier":"s3://dvn-cloud:195d89f8a69-80c5dc815d53","rootDataFileId":-1,"md5":"7bc4b961b1d1dc57090c7c4a7db2badc","checksum":{"type":"MD5","value":"7bc4b961b1d1dc57090c7c4a7db2badc"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Human proteome protein-nucleic acid interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","label":"pesto_70_plddt_70_nucleic_acid.jsonl.gz","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034001,"persistentId":"","filename":"pesto_70_plddt_70_nucleic_acid.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":41289880,"description":"Human proteome protein-nucleic acid interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","storageIdentifier":"s3://dvn-cloud:195d89f9274-67a71bcc0972","rootDataFileId":-1,"md5":"50ed7bce8880c9971a2de0d8fcfdf53d","checksum":{"type":"MD5","value":"50ed7bce8880c9971a2de0d8fcfdf53d"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Human proteome protein-protein interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","label":"pesto_70_plddt_70_protein.jsonl.gz","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034002,"persistentId":"","filename":"pesto_70_plddt_70_protein.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":132879043,"description":"Human proteome protein-protein interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","storageIdentifier":"s3://dvn-cloud:195d89f7aa3-5f09390770e9","rootDataFileId":-1,"md5":"60ec39c9f041836a6893ebad9d7fc837","checksum":{"type":"MD5","value":"60ec39c9f041836a6893ebad9d7fc837"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Human proteome protein-small molecule interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","label":"pesto_70_plddt_70_small_molecule.jsonl.gz","restricted":false,"directoryLabel":"ATOMICANets","version":1,"datasetVersionId":441123,"dataFile":{"id":11034004,"persistentId":"","filename":"pesto_70_plddt_70_small_molecule.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":16785015,"description":"Human proteome protein-small molecule interfaces from AlphaFold2 structures with pLDDT 70% cutoff and PeSTO 70 confidence cutoff","storageIdentifier":"s3://dvn-cloud:195d89f7f84-4d3b9254f378","rootDataFileId":-1,"md5":"0c3d32eb8cd573ea2cc1886c232c481d","checksum":{"type":"MD5","value":"0c3d32eb8cd573ea2cc1886c232c481d"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-ion 30% sequence similarity split","label":"Pion_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033989,"persistentId":"","filename":"Pion_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":2792811,"description":"Protein-ion 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d894db50-f3f48edfe075","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":2494764,"originalFileName":"Pion_ids.csv","UNF":"UNF:6:RmxcW4MpC8dDKJ5YhhetaQ==","rootDataFileId":-1,"md5":"7691672fe28901f28943701e947496f5","checksum":{"type":"MD5","value":"7691672fe28901f28943701e947496f5"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-ion pre-training data","label":"Pion.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033994,"persistentId":"","filename":"Pion.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":105449814,"description":"Protein-ion pre-training data","storageIdentifier":"s3://dvn-cloud:195d894d91b-436edc90453f","rootDataFileId":-1,"md5":"a00c7c5c25bddf19b3ef1354983b6097","checksum":{"type":"MD5","value":"a00c7c5c25bddf19b3ef1354983b6097"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-small molecule 30% sequence similarity split","label":"PL_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033996,"persistentId":"","filename":"PL_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":4590011,"description":"Protein-small molecule 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d894c617-a7ed16c26c0f","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":4113952,"originalFileName":"PL_ids.csv","UNF":"UNF:6:NyV65rMg/9kna1s6w9peyA==","rootDataFileId":-1,"md5":"61fdc5e58913d7a811cc5224f9e87e4d","checksum":{"type":"MD5","value":"61fdc5e58913d7a811cc5224f9e87e4d"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-small molecule pre-training data","label":"PL.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033991,"persistentId":"","filename":"PL.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":502259276,"description":"Protein-small molecule pre-training data","storageIdentifier":"s3://dvn-cloud:195d894be39-5ffa9490b65b","rootDataFileId":-1,"md5":"8961288d17033882a44a0dd57b9ceece","checksum":{"type":"MD5","value":"8961288d17033882a44a0dd57b9ceece"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-peptide 30% sequence similarity split","label":"Ppeptide_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033993,"persistentId":"","filename":"Ppeptide_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":324427,"description":"Protein-peptide 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d89373a2-469b196caabc","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":290536,"originalFileName":"Ppeptide_ids.csv","UNF":"UNF:6:K34Ftjt7kdT5Z3ji6zZiWw==","rootDataFileId":-1,"md5":"b2b71040f101f69846f1352a6237f45b","checksum":{"type":"MD5","value":"b2b71040f101f69846f1352a6237f45b"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-peptide pre-training data","label":"Ppeptide.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033992,"persistentId":"","filename":"Ppeptide.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":57193859,"description":"Protein-peptide pre-training data","storageIdentifier":"s3://dvn-cloud:195d8936494-603d1f0d4f40","rootDataFileId":-1,"md5":"fced77c894751658e1212e2aaf536ea0","checksum":{"type":"MD5","value":"fced77c894751658e1212e2aaf536ea0"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-protein 30% sequence similarity split","label":"PP_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033984,"persistentId":"","filename":"PP_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":2605836,"description":"Protein-protein 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d8946a34-388f57b4f9d0","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":2107681,"originalFileName":"PP_ids.csv","UNF":"UNF:6:Hj9F0FLleU13zaO5vffu9g==","rootDataFileId":-1,"md5":"8bc3abed35855447b6f3a83bf9620532","checksum":{"type":"MD5","value":"8bc3abed35855447b6f3a83bf9620532"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-protein pre-training data","label":"PP.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033986,"persistentId":"","filename":"PP.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":1404346699,"description":"Protein-protein pre-training data","storageIdentifier":"s3://dvn-cloud:195d8945718-dc680845680f","rootDataFileId":-1,"md5":"baf7b80399ae0f53ffca3f634902ca95","checksum":{"type":"MD5","value":"baf7b80399ae0f53ffca3f634902ca95"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-RNA 30% sequence similarity split","label":"PRNA_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033983,"persistentId":"","filename":"PRNA_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":139766,"description":"Protein-RNA 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d8937332-a5897d8499b0","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":125731,"originalFileName":"PRNA_ids.csv","UNF":"UNF:6:Yz3HNEhVX8Z/vWiRfXNOxg==","rootDataFileId":-1,"md5":"225403678ebf70a946566cd4f83b3d36","checksum":{"type":"MD5","value":"225403678ebf70a946566cd4f83b3d36"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"Protein-RNA pre-training data","label":"PRNA.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033995,"persistentId":"","filename":"PRNA.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":69038123,"description":"Protein-RNA pre-training data","storageIdentifier":"s3://dvn-cloud:195d8937194-9944670c9353","rootDataFileId":-1,"md5":"f9ca8ae764731daaf610600f03ec5648","checksum":{"type":"MD5","value":"f9ca8ae764731daaf610600f03ec5648"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"RNA-ligand 30% sequence similarity split","label":"RNAL_ids.tab","restricted":false,"directoryLabel":"pretraining_data","version":3,"datasetVersionId":441123,"dataFile":{"id":11033985,"persistentId":"","filename":"RNAL_ids.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":227014,"description":"RNA-ligand 30% sequence similarity split","storageIdentifier":"s3://dvn-cloud:195d893674b-0d6e1f2355d0","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":206283,"originalFileName":"RNAL_ids.csv","UNF":"UNF:6:C5VSg8nATMoeMgYItM6dqA==","rootDataFileId":-1,"md5":"2dac5a37bbadf1fd1b7d1eef8bf54d7d","checksum":{"type":"MD5","value":"2dac5a37bbadf1fd1b7d1eef8bf54d7d"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"RNA-ligand pre-training data","label":"RNAL.jsonl.gz","restricted":false,"directoryLabel":"pretraining_data","version":1,"datasetVersionId":441123,"dataFile":{"id":11033987,"persistentId":"","filename":"RNAL.jsonl.gz","contentType":"application/x-gzip","friendlyType":"Gzip Archive","filesize":7089739,"description":"RNA-ligand pre-training data","storageIdentifier":"s3://dvn-cloud:195d89366d7-23e03b9c76c4","rootDataFileId":-1,"md5":"be075b567fe6d939ff1996efc85a3027","checksum":{"type":"MD5","value":"be075b567fe6d939ff1996efc85a3027"},"tabularData":false,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}},{"description":"30% sequence similarity split for ZN ligands on protein-ion complexes","label":"ZN_ids_sequence_30_split.tab","restricted":false,"directoryLabel":"ATOMICA-Ligand","version":4,"datasetVersionId":441123,"dataFile":{"id":11034570,"persistentId":"","filename":"ZN_ids_sequence_30_split.tab","contentType":"text/tab-separated-values","friendlyType":"Tab-Delimited","filesize":7542880,"description":"30% sequence similarity split for ZN ligands on protein-ion complexes","storageIdentifier":"s3://dvn-cloud:195d8e06215-de29cbf9f5bf","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6731665,"originalFileName":"ZN_ids_sequence_30_split.csv","UNF":"UNF:6:vM5BKDQvTbtaVAgkE7/Hpw==","rootDataFileId":-1,"md5":"6a1d7f693efeb4db79ffb5a0aec2fb23","checksum":{"type":"MD5","value":"6a1d7f693efeb4db79ffb5a0aec2fb23"},"tabularData":true,"creationDate":"2025-03-27","publicationDate":"2025-04-02","fileAccessRequest":true}}],"citation":"Fang, Ada; Zaixi Zhang; Andrew Zhou; Marinka Zitnik, 2025, \"ATOMICA\", https://doi.org/10.7910/DVN/4DUBJX, Harvard Dataverse, V1, UNF:6:5HKIlWTQwgwDI29MfVc/5Q== [fileUNF]"}}