{"id":59490,"identifier":"DVN/GGHMFT","persistentUrl":"https://doi.org/10.7910/DVN/GGHMFT","protocol":"doi","authority":"10.7910","separator":"/","publisher":"Harvard Dataverse","publicationDate":"2012-12-13","storageIdentifier":"file://1902.1/19575","datasetType":"dataset","datasetVersion":{"id":60123,"datasetId":59490,"datasetPersistentId":"doi:10.7910/DVN/GGHMFT","storageIdentifier":"file://1902.1/19575","versionNumber":2,"versionMinorNumber":0,"versionState":"RELEASED","latestVersionPublishingState":"RELEASED","deaccessionNote":"Updated files","deaccessionLink":"","distributionDate":"2012","productionDate":"2012","lastUpdateTime":"2012-12-13T08:56:16Z","releaseTime":"2012-12-12T19:00:00Z","createTime":"2012-12-13T08:56:16Z","alternativePersistentId":"hdl:1902.1/19575","publicationDate":"2012-12-13","citationDate":"2012-12-13","license":{"name":"CC0 1.0","uri":"http://creativecommons.org/publicdomain/zero/1.0","iconUri":"https://licensebuttons.net/p/zero/1.0/88x31.png","rightsIdentifier":"CC0-1.0","rightsIdentifierScheme":"SPDX","schemeUri":"https://spdx.org/licenses/","languageCode":"en"},"fileAccessRequest":false,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"Replication data for: Topic-partitioned multinetwork embeddings"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Krafft, Peter"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Massachusetts Institute of Technology"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Moore, Juston"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"University of Massachusetts Amherst"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Desmarais, Bruce"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"University of Massachusetts Amherst"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Wallach, Hanna"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"University of Massachusetts Amherst"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Bruce Desmarais"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"University of Massachusetts Amherst"},"datasetContactEmail":{"typeName":"datasetContactEmail","multiple":false,"typeClass":"primitive","value":"desmarais@polsci.umass.edu"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"We introduce a joint model of network content and context designed for exploratory analysis of email networks via visualization of topic-specific communication patterns. Our model is an admixture model for text and network attributes which uses multinomial distributions over words as mixture components for explaining text and latent Euclidean positions of actors as mixture components for explaining network attributes. We validate the appropriateness of our model by achieving state-of-the-art performance on a link prediction task and by achieving semantic coherence equivalent to that of latent Dirichlet allocation. We demonstrate the capability of our model for descriptive, explanatory, and exploratory analysis by investigating the inferred topic-specific communication patterns of a new government email dataset, the New Hanover County email corpus.   This work was supported in part by the Center for Intelligent Information Retrieval and in part by the NSF GRFP under grant #1122374. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect those of the sponsors."},"dsDescriptionDate":{"typeName":"dsDescriptionDate","multiple":false,"typeClass":"primitive","value":"2012"}}]},{"typeName":"keyword","multiple":true,"typeClass":"compound","value":[{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"network analysis, topic modeling, machine learning, political science, latent space"}}]},{"typeName":"topicClassification","multiple":true,"typeClass":"compound","value":[{"topicClassValue":{"typeName":"topicClassValue","multiple":false,"typeClass":"primitive","value":"network analysis, topic modeling, machine learning, political science"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Peter Krafft, Juston Moore, Bruce Desmarais, Hanna Wallach. Topic-partitioned multinetwork embeddings.Advances in Neural Information Processing Systems 25. 2012."},"publicationURL":{"typeName":"publicationURL","multiple":false,"typeClass":"primitive","value":"http://books.nips.cc/papers/files/nips25/NIPS2012_1288.pdf"}}]},{"typeName":"producer","multiple":true,"typeClass":"compound","value":[{"producerName":{"typeName":"producerName","multiple":false,"typeClass":"primitive","value":"Bruce Desmarais"},"producerAffiliation":{"typeName":"producerAffiliation","multiple":false,"typeClass":"primitive","value":"University of Massachusetts Amherst"},"producerURL":{"typeName":"producerURL","multiple":false,"typeClass":"primitive","value":"http://people.umass.edu/bruced/"}}]},{"typeName":"productionDate","multiple":false,"typeClass":"primitive","value":"2012"},{"typeName":"distributor","multiple":true,"typeClass":"compound","value":[{"distributorName":{"typeName":"distributorName","multiple":false,"typeClass":"primitive","value":"Bruce Desmarais"}}]},{"typeName":"distributionDate","multiple":false,"typeClass":"primitive","value":"2012"},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2012-12-13"},{"typeName":"timePeriodCovered","multiple":true,"typeClass":"compound","value":[{"timePeriodCoveredStart":{"typeName":"timePeriodCoveredStart","multiple":false,"typeClass":"primitive","value":"2011-02"},"timePeriodCoveredEnd":{"typeName":"timePeriodCoveredEnd","multiple":false,"typeClass":"primitive","value":"2011-02"}}]},{"typeName":"dateOfCollection","multiple":true,"typeClass":"compound","value":[{"dateOfCollectionStart":{"typeName":"dateOfCollectionStart","multiple":false,"typeClass":"primitive","value":"2011-03"},"dateOfCollectionEnd":{"typeName":"dateOfCollectionEnd","multiple":false,"typeClass":"primitive","value":"2011-06"}}]},{"typeName":"kindOfData","multiple":true,"typeClass":"primitive","value":["government email archive"]},{"typeName":"relatedMaterial","multiple":true,"typeClass":"primitive","value":["N/A"]},{"typeName":"relatedDatasets","multiple":true,"typeClass":"primitive","value":["N/A"]}]},"geospatial":{"displayName":"Geospatial Metadata","name":"geospatial","fields":[{"typeName":"geographicCoverage","multiple":true,"typeClass":"compound","value":[{"country":{"typeName":"country","multiple":false,"typeClass":"controlledVocabulary","value":"United States"}},{"otherGeographicCoverage":{"typeName":"otherGeographicCoverage","multiple":false,"typeClass":"primitive","value":"New Hanover County, North Carolina"}}]},{"typeName":"geographicUnit","multiple":true,"typeClass":"primitive","value":["County"]}]},"socialscience":{"displayName":"Social Science and Humanities Metadata","name":"socialscience","fields":[{"typeName":"universe","multiple":true,"typeClass":"primitive","value":["communication networks"]}]}},"files":[{"description":"- each line represents the email address of an author - the order of the authors correspond to the author index and recipient columns in edge-matrix.txt","label":"authors.txt","restricted":false,"version":1,"datasetVersionId":60123,"dataFile":{"id":2426894,"persistentId":"doi:10.7910/DVN/GGHMFT/3ZBIOB","pidURL":"https://doi.org/10.7910/DVN/GGHMFT/3ZBIOB","filename":"authors.txt","contentType":"text/plain; charset=US-ASCII","friendlyType":"Plain Text","filesize":641,"description":"- each line represents the email address of an author - the order of the authors correspond to the author index and recipient columns in edge-matrix.txt","storageIdentifier":"s3://dvn-cloud:95138","rootDataFileId":-1,"md5":"489c5376e8a0a0d1543b700bf357068c","checksum":{"type":"MD5","value":"489c5376e8a0a0d1543b700bf357068c"},"tabularData":false,"creationDate":"2012-12-13","publicationDate":"2012-12-12","fileAccessRequest":false}},{"description":"- each line represents a document - columns are separated by commas - the first column gives the name of the original document location   (this can also be an empty column) - the second column gives an index between zero and the number of   actors in the email network minus one (inclusive) indicating the   author of that email - there is one additional column for each actor in the email   network. Each column should contain either a one (indicating that   the actor is a recipient of that row's email) or a zero (indicating   that the actor is not a recipient of that row's email). The order of   these columns should correspond to the indices used to indicate the   authors of the emails. The column for the email's author should be 0.","label":"edge-matrix.csv","restricted":false,"version":1,"datasetVersionId":60123,"dataFile":{"id":2426892,"persistentId":"doi:10.7910/DVN/GGHMFT/XA1EYF","pidURL":"https://doi.org/10.7910/DVN/GGHMFT/XA1EYF","filename":"edge-matrix.csv","contentType":"text/plain; charset=US-ASCII","friendlyType":"Plain Text","filesize":197019,"description":"- each line represents a document - columns are separated by commas - the first column gives the name of the original document location   (this can also be an empty column) - the second column gives an index between zero and the number of   actors in the email network minus one (inclusive) indicating the   author of that email - there is one additional column for each actor in the email   network. Each column should contain either a one (indicating that   the actor is a recipient of that row's email) or a zero (indicating   that the actor is not a recipient of that row's email). The order of   these columns should correspond to the indices used to indicate the   authors of the emails. The column for the email's author should be 0.","storageIdentifier":"s3://dvn-cloud:95134","rootDataFileId":-1,"md5":"a69ec771e1a14dfc1c457ac6d2f44d09","checksum":{"type":"MD5","value":"a69ec771e1a14dfc1c457ac6d2f44d09"},"tabularData":false,"creationDate":"2012-12-13","publicationDate":"2012-12-12","fileAccessRequest":false}},{"description":"description of the data files","label":"README","restricted":false,"version":1,"datasetVersionId":60123,"categories":["plain text file"],"dataFile":{"id":2426890,"persistentId":"doi:10.7910/DVN/GGHMFT/T4SUPM","pidURL":"https://doi.org/10.7910/DVN/GGHMFT/T4SUPM","filename":"README","contentType":"text/plain; charset=US-ASCII","friendlyType":"Plain Text","filesize":3015,"description":"description of the data files","categories":["plain text file"],"storageIdentifier":"s3://dvn-cloud:95132","rootDataFileId":-1,"md5":"d0cd6c9116a468af253e4059e4d637ac","checksum":{"type":"MD5","value":"d0cd6c9116a468af253e4059e4d637ac"},"tabularData":false,"creationDate":"2012-12-13","publicationDate":"2012-12-12","fileAccessRequest":false}},{"description":"- each line represents a word type in the vocabulary - the order of the words must correspond to the order of the columns   in the word matrix file","label":"vocab.txt","restricted":false,"version":1,"datasetVersionId":60123,"dataFile":{"id":2426891,"persistentId":"doi:10.7910/DVN/GGHMFT/MEPLGL","pidURL":"https://doi.org/10.7910/DVN/GGHMFT/MEPLGL","filename":"vocab.txt","contentType":"text/plain; charset=US-ASCII","friendlyType":"Plain Text","filesize":50883,"description":"- each line represents a word type in the vocabulary - the order of the words must correspond to the order of the columns   in the word matrix file","storageIdentifier":"s3://dvn-cloud:95137","rootDataFileId":-1,"md5":"397046896b1bab1699be9bef418b459e","checksum":{"type":"MD5","value":"397046896b1bab1699be9bef418b459e"},"tabularData":false,"creationDate":"2012-12-13","publicationDate":"2012-12-12","fileAccessRequest":false}},{"description":"- each line represents a document - columns are separated by commas - the first column gives the name of the original document location   (this can also be an empty column) - each subsequent column should contain a nonnegative number   indicating the number of times the word type associated with that   column occurs in that document (i.e. a vector of word counts   corresponding to the word types given in the vocab folder).","label":"word-matrix.csv","restricted":false,"version":1,"datasetVersionId":60123,"dataFile":{"id":2426893,"persistentId":"doi:10.7910/DVN/GGHMFT/17LTHC","pidURL":"https://doi.org/10.7910/DVN/GGHMFT/17LTHC","filename":"word-matrix.csv","contentType":"text/plain; charset=US-ASCII","friendlyType":"Plain Text","filesize":21913012,"description":"- each line represents a document - columns are separated by commas - the first column gives the name of the original document location   (this can also be an empty column) - each subsequent column should contain a nonnegative number   indicating the number of times the word type associated with that   column occurs in that document (i.e. a vector of word counts   corresponding to the word types given in the vocab folder).","storageIdentifier":"s3://dvn-cloud:95136","rootDataFileId":-1,"md5":"574ae4f4584a5b294e2d6832cb974a15","checksum":{"type":"MD5","value":"574ae4f4584a5b294e2d6832cb974a15"},"tabularData":false,"creationDate":"2012-12-13","publicationDate":"2012-12-12","fileAccessRequest":false}}],"citation":"Krafft, Peter; Moore, Juston; Desmarais, Bruce; Wallach, Hanna, 2012, \"Replication data for: Topic-partitioned multinetwork embeddings\", https://doi.org/10.7910/DVN/GGHMFT, Harvard Dataverse, V2"}}