{"@context":{"@language":"en","@vocab":"https://schema.org/","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","rai":"http://mlcommons.org/croissant/RAI/","data":{"@id":"cr:data","@type":"@json"},"dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","examples":{"@id":"cr:examples","@type":"@json"},"extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform","wd":"https://www.wikidata.org/wiki/"},"@type":"sc:Dataset","conformsTo":"http://mlcommons.org/croissant/1.0","name":"Chinese patent in Google patent public data","url":"https://doi.org/10.7910/DVN/ZVTIP1","creator":[{"@type":"Person","givenName":"Ji","familyName":"Li","affiliation":{"@type":"Organization","name":"Xiamen University"},"name":"Li, Ji"},{"@type":"Person","givenName":"Dongbo","familyName":"Shi","affiliation":{"@type":"Organization","name":"https://ror.org/05t6hvr95"},"name":"Shi, Dongbo"}],"description":"Research-ready Chinese invention patent dataset (1985-2024) from Google Patent Public Data, including: (1) 25.4 million processed patent records in 13 relational tables, (2) GPPD-ADCP Master Key—a validated crosswalk with official CNIPA records, and (3) open-source processing scripts. The dataset has been systematically validated against the official CNIPA authority file, achieving 99.96% coverage rate.","keywords":["Social Sciences","Chinese patents","Google Patent","Patent Data","Data Validation","Innovation"],"license":"http://creativecommons.org/licenses/by/4.0","datePublished":"2026-02-20","dateModified":"2026-04-13","includedInDataCatalog":{"@type":"DataCatalog","name":"Harvard Dataverse","url":"https://dataverse.harvard.edu"},"publisher":{"@type":"Organization","name":"Harvard Dataverse"},"version":"2.0","citeAs":"@data{DVN/ZVTIP1_2026,author = {Li, Ji and Shi, Dongbo},publisher = {Harvard Dataverse},title = {Chinese patent in Google patent public data},year = {2026},url = {https://doi.org/10.7910/DVN/ZVTIP1}}","citation":[{"@type":"CreativeWork","name":"Li, Ji and Shi, Dongbo, Chinese Patent in Google Patent Public Data: A Guide to Processing and Validation (February 14, 2026). Available at SSRN: https://ssrn.com/abstract=6259679 or http://dx.doi.org/10.2139/ssrn.6259679","@id":"http://dx.doi.org/10.2139/ssrn.6259679","identifier":"http://dx.doi.org/10.2139/ssrn.6259679","url":"http://dx.doi.org/10.2139/ssrn.6259679"}],"distribution":[{"@type":"cr:FileObject","@id":"00version_history.md","name":"00version_history.md","encodingFormat":"text/markdown","md5":"ad3f37d383bc738f25a691aba9262c9a","contentSize":"1020","description":"version history log","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13668043"},{"@type":"cr:FileObject","@id":"00_readme.pdf","name":"00_readme.pdf","encodingFormat":"application/pdf","md5":"8166177b9de7310de577cf7b0ce03f5a","contentSize":"79390","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468537"},{"@type":"cr:FileObject","@id":"01_GPPD_CN_patent_schema.png","name":"01_GPPD_CN_patent_schema.png","encodingFormat":"image/png","md5":"960078d60eff0c701fcbc115d11b7bb0","contentSize":"574001","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468536"},{"@type":"cr:FileObject","@id":"cn_app_pub_number.txt.zip","name":"cn_app_pub_number.txt.zip","encodingFormat":"application/zip","md5":"374b6ac4315e9a67545be06edf6730c7","contentSize":"363012935","description":"Master identifiers and family IDs.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464282"},{"@type":"cr:FileObject","@id":"cn_assignee.txt.zip","name":"cn_assignee.txt.zip","encodingFormat":"application/zip","md5":"7b82d93d4ff140b3053bab774e619619","contentSize":"424471019","description":"Patent assignees.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464283"},{"@type":"cr:FileObject","@id":"cn_backward.txt.zip","name":"cn_backward.txt.zip","encodingFormat":"application/zip","md5":"96823cfa3cc3b734b8ba4b0da5deaa26","contentSize":"499282430","description":"Backward citations.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464290"},{"@type":"cr:FileObject","@id":"cn_child.txt.zip","name":"cn_child.txt.zip","encodingFormat":"application/zip","md5":"75bf120c2c4bc528849fdfe039dfcac3","contentSize":"4876696","description":"Identification of parent and child applications.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464284"},{"@type":"cr:FileObject","@id":"cn_date.txt.zip","name":"cn_date.txt.zip","encodingFormat":"application/zip","md5":"84efeeb3f0ac8bd3c575a330efb5a2c4","contentSize":"407589087","description":"Application, publication, and priority dates.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464289"},{"@type":"cr:FileObject","@id":"cn_examiner.txt.zip","name":"cn_examiner.txt.zip","encodingFormat":"application/zip","md5":"e60e6d84ed22258bf8918b2b6e01882e","contentSize":"130723102","description":"Patent examiner names.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464285"},{"@type":"cr:FileObject","@id":"cn_inventor.txt.zip","name":"cn_inventor.txt.zip","encodingFormat":"application/zip","md5":"97e1b36ad910ec19a5dd7d1e2ecd5473","contentSize":"474962816","description":"Inventor names.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464291"},{"@type":"cr:FileObject","@id":"cn_ipc.txt.zip","name":"cn_ipc.txt.zip","encodingFormat":"application/zip","md5":"2cfa7ff5297c42caaf35aa081f64f389","contentSize":"311871272","description":"International Patent Classification (IPC) codes.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464287"},{"@type":"cr:FileObject","@id":"cn_ipc_v2.txt.zip","name":"cn_ipc_v2.txt.zip","encodingFormat":"application/zip","md5":"f53be4a9218398ef7e46109887ed41d6","contentSize":"450812570","description":"IPC data table (v2.0). Features publication numbers for global linkage and an 'is_first' flag for primary IPC codes.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13668042"},{"@type":"cr:FileObject","@id":"cn_npl.txt.zip","name":"cn_npl.txt.zip","encodingFormat":"application/zip","md5":"e10b4101a39c3954fba570cd2c50e3ae","contentSize":"487536641","description":"Non patent literature (NPL) references.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464288"},{"@type":"cr:FileObject","@id":"cn_title.txt.zip","name":"cn_title.txt.zip","encodingFormat":"application/zip","md5":"c8f2f96556552376e0477c54f3d73380","contentSize":"1488831975","description":"Patent titles.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464336"},{"@type":"cr:FileObject","@id":"cn_top_term.txt.zip","name":"cn_top_term.txt.zip","encodingFormat":"application/zip","md5":"e0f17dcc5b1f361bcd317837fada349b","contentSize":"1194644643","description":"NLP-extracted technical keywords.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464334"},{"@type":"cr:FileObject","@id":"GPPD_ADCP_Invention_MasterKey.txt.zip","name":"GPPD_ADCP_Invention_MasterKey.txt.zip","encodingFormat":"application/zip","md5":"5425cc8f32998b630da68564ec3fbd13","contentSize":"197563628","description":"GPPD-ADCP Master Key","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464286"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.001","name":"cn_abstract_split.zip.001","encodingFormat":"application/x-rar","md5":"65cf6834cd26aeb91d36754ecba878f6","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468535"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.002","name":"cn_abstract_split.zip.002","encodingFormat":"application/x-rar","md5":"c78f2d7b3d21571377a7ab02ef0c5767","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468425"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.003","name":"cn_abstract_split.zip.003","encodingFormat":"application/x-rar","md5":"ba7d8f1a0386cd0ed120eed453cffd91","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468427"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.004","name":"cn_abstract_split.zip.004","encodingFormat":"application/x-rar","md5":"6d6c1f6be6467ad9927d8c8996bf22b1","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468426"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.005","name":"cn_abstract_split.zip.005","encodingFormat":"application/x-rar","md5":"1592c9fe34ce581f5fd39f347be0bc3f","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468261"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.006","name":"cn_abstract_split.zip.006","encodingFormat":"application/x-rar","md5":"032473f07505ead5dc2364d72fa5abf7","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468262"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.007","name":"cn_abstract_split.zip.007","encodingFormat":"application/x-rar","md5":"008ca3a430d89b0b8017690a2072d9d1","contentSize":"2147483648","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468260"},{"@type":"cr:FileObject","@id":"cn_abstract/cn_abstract_split.zip.008","name":"cn_abstract_split.zip.008","encodingFormat":"application/x-rar","md5":"a8a799395f039787be14c39fb9034372","contentSize":"785693162","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464360"},{"@type":"cr:FileObject","@id":"cn_embedding/cn_embedding_split.zip.001","name":"cn_embedding_split.zip.001","encodingFormat":"application/x-rar","md5":"b0ab66647cd65d1738340e529e7ef7f2","contentSize":"2147483648","description":"High-dimensional semantic vectors.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464348"},{"@type":"cr:FileObject","@id":"cn_embedding/cn_embedding_split.zip.002","name":"cn_embedding_split.zip.002","encodingFormat":"application/x-rar","md5":"d8a7d9012bb9d83f5980782b3db03706","contentSize":"2147483648","description":"High-dimensional semantic vectors.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464351"},{"@type":"cr:FileObject","@id":"cn_embedding/cn_embedding_split.zip.003","name":"cn_embedding_split.zip.003","encodingFormat":"application/x-rar","md5":"a8e1a93f423ecdd05a8096818c1f88b5","contentSize":"2147483648","description":"High-dimensional semantic vectors.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464349"},{"@type":"cr:FileObject","@id":"cn_embedding/cn_embedding_split.zip.004","name":"cn_embedding_split.zip.004","encodingFormat":"application/x-rar","md5":"c22ad2995f92d0e885cf33b13ee108cd","contentSize":"2077346381","description":"High-dimensional semantic vectors.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13464350"},{"@type":"cr:FileObject","@id":"code/GPPD_analyse.R","name":"GPPD_analyse.R","encodingFormat":"type/x-r-syntax","md5":"145942c99fee56054b2f8fad35cd1f71","contentSize":"5293","description":"R script for generating the Master Key and validation stats.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468540"},{"@type":"cr:FileObject","@id":"code/process_gppd.sh","name":"process_gppd.sh","encodingFormat":"application/x-sh","md5":"16ef251afabedbbb3440db918014f639","contentSize":"4015","description":"Shell script for filtering raw data and calculating coverage.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468539"},{"@type":"cr:FileObject","@id":"code/researchdata_parser.py","name":"researchdata_parser.py","encodingFormat":"text/x-python-script","md5":"be28724364d3c8c2dcc6f7a1b74cb5d2","contentSize":"1917","description":"Python parser for patents.research.","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468538"},{"@type":"cr:FileObject","@id":"Python parser for patents.publications/patent_parser.py","name":"patent_parser.py","encodingFormat":"text/x-python-script","md5":"c64bcf20ec9f3355f26f3ccd3a0db0e5","contentSize":"7138","description":"","contentUrl":"https://dataverse.harvard.edu/api/access/datafile/13468541"}]}