<codeBook xmlns="ddi:codebook:2_5" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:codebook:2_5 https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" version="2.5"><docDscr><citation><titlStmt><titl>Chinese patent in Google patent public data</titl><IDNo agency="DOI">doi:10.7910/DVN/ZVTIP1</IDNo></titlStmt><distStmt><distrbtr source="archive">Harvard Dataverse</distrbtr><distDate>2026-02-20</distDate></distStmt><verStmt source="archive"><version date="2026-04-13" type="RELEASED">2</version></verStmt><biblCit>Li, Ji; Shi, Dongbo, 2026, "Chinese patent in Google patent public data", https://doi.org/10.7910/DVN/ZVTIP1, Harvard Dataverse, V2</biblCit></citation></docDscr><stdyDscr><citation><titlStmt><titl>Chinese patent in Google patent public data</titl><IDNo agency="DOI">doi:10.7910/DVN/ZVTIP1</IDNo></titlStmt><rspStmt><AuthEnty affiliation="Xiamen University">Li, Ji</AuthEnty><AuthEnty affiliation="https://ror.org/05t6hvr95">Shi, Dongbo</AuthEnty></rspStmt><prodStmt/><distStmt><distrbtr source="archive">Harvard Dataverse</distrbtr><contact email="skyeliji@gmail.com">Li, Ji</contact><depositr>Li, Ji</depositr><depDate>2026-02-14</depDate></distStmt><holdings URI="https://doi.org/10.7910/DVN/ZVTIP1"/></citation><stdyInfo><subject><keyword xml:lang="en">Social Sciences</keyword><keyword>Chinese patents</keyword><keyword>Google Patent</keyword><keyword>Patent Data</keyword><keyword>Data Validation</keyword><keyword>Innovation</keyword></subject><abstract date="2026-02-14">Research-ready Chinese invention patent dataset (1985-2024) from Google Patent Public Data, including: (1) 25.4 million processed patent records in 13 relational tables, (2) GPPD-ADCP Master Key—a validated crosswalk with official CNIPA records, and (3) open-source processing scripts. The dataset has been systematically validated against the official CNIPA authority file, achieving 99.96% coverage rate.</abstract><sumDscr/></stdyInfo><method><dataColl><sources/></dataColl><anlyInfo/></method><dataAccs><setAvail/><useStmt/><notes type="DVN:TOU" level="dv">&lt;a href="http://creativecommons.org/licenses/by/4.0">CC BY 4.0&lt;/a></notes></dataAccs><othrStdyMat><relPubl><citation><titlStmt><titl>Li, Ji and Shi, Dongbo, Chinese Patent in Google Patent Public Data: A Guide to Processing and Validation (February 14, 2026). Available at SSRN: https://ssrn.com/abstract=6259679 or http://dx.doi.org/10.2139/ssrn.6259679</titl><IDNo agency="doi">SSRN</IDNo></titlStmt><biblCit>Li, Ji and Shi, Dongbo, Chinese Patent in Google Patent Public Data: A Guide to Processing and Validation (February 14, 2026). Available at SSRN: https://ssrn.com/abstract=6259679 or http://dx.doi.org/10.2139/ssrn.6259679</biblCit></citation><ExtLink URI="http://dx.doi.org/10.2139/ssrn.6259679"/></relPubl></othrStdyMat></stdyDscr><otherMat ID="f13468537" URI="https://dataverse.harvard.edu/api/access/datafile/13468537" level="datafile"><labl>00_readme.pdf</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/pdf</notes></otherMat><otherMat ID="f13668043" URI="https://dataverse.harvard.edu/api/access/datafile/13668043" level="datafile"><labl>00version_history.md</labl><txt>version history log</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/markdown</notes></otherMat><otherMat ID="f13468536" URI="https://dataverse.harvard.edu/api/access/datafile/13468536" level="datafile"><labl>01_GPPD_CN_patent_schema.png</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">image/png</notes></otherMat><otherMat ID="f13468535" URI="https://dataverse.harvard.edu/api/access/datafile/13468535" level="datafile"><labl>cn_abstract_split.zip.001</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13468425" URI="https://dataverse.harvard.edu/api/access/datafile/13468425" level="datafile"><labl>cn_abstract_split.zip.002</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13468427" URI="https://dataverse.harvard.edu/api/access/datafile/13468427" level="datafile"><labl>cn_abstract_split.zip.003</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13468426" URI="https://dataverse.harvard.edu/api/access/datafile/13468426" level="datafile"><labl>cn_abstract_split.zip.004</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13468261" URI="https://dataverse.harvard.edu/api/access/datafile/13468261" level="datafile"><labl>cn_abstract_split.zip.005</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13468262" URI="https://dataverse.harvard.edu/api/access/datafile/13468262" level="datafile"><labl>cn_abstract_split.zip.006</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13468260" URI="https://dataverse.harvard.edu/api/access/datafile/13468260" level="datafile"><labl>cn_abstract_split.zip.007</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13464360" URI="https://dataverse.harvard.edu/api/access/datafile/13464360" level="datafile"><labl>cn_abstract_split.zip.008</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13464282" URI="https://dataverse.harvard.edu/api/access/datafile/13464282" level="datafile"><labl>cn_app_pub_number.txt.zip</labl><txt>Master identifiers and family IDs.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464283" URI="https://dataverse.harvard.edu/api/access/datafile/13464283" level="datafile"><labl>cn_assignee.txt.zip</labl><txt>Patent assignees.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464290" URI="https://dataverse.harvard.edu/api/access/datafile/13464290" level="datafile"><labl>cn_backward.txt.zip</labl><txt>Backward citations.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464284" URI="https://dataverse.harvard.edu/api/access/datafile/13464284" level="datafile"><labl>cn_child.txt.zip</labl><txt>Identification of parent and child applications.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464289" URI="https://dataverse.harvard.edu/api/access/datafile/13464289" level="datafile"><labl>cn_date.txt.zip</labl><txt>Application, publication, and priority dates.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464348" URI="https://dataverse.harvard.edu/api/access/datafile/13464348" level="datafile"><labl>cn_embedding_split.zip.001</labl><txt>High-dimensional semantic vectors.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13464351" URI="https://dataverse.harvard.edu/api/access/datafile/13464351" level="datafile"><labl>cn_embedding_split.zip.002</labl><txt>High-dimensional semantic vectors.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13464349" URI="https://dataverse.harvard.edu/api/access/datafile/13464349" level="datafile"><labl>cn_embedding_split.zip.003</labl><txt>High-dimensional semantic vectors.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13464350" URI="https://dataverse.harvard.edu/api/access/datafile/13464350" level="datafile"><labl>cn_embedding_split.zip.004</labl><txt>High-dimensional semantic vectors.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-rar</notes></otherMat><otherMat ID="f13464285" URI="https://dataverse.harvard.edu/api/access/datafile/13464285" level="datafile"><labl>cn_examiner.txt.zip</labl><txt>Patent examiner names.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464291" URI="https://dataverse.harvard.edu/api/access/datafile/13464291" level="datafile"><labl>cn_inventor.txt.zip</labl><txt>Inventor names.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464287" URI="https://dataverse.harvard.edu/api/access/datafile/13464287" level="datafile"><labl>cn_ipc.txt.zip</labl><txt>International Patent Classification (IPC) codes.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13668042" URI="https://dataverse.harvard.edu/api/access/datafile/13668042" level="datafile"><labl>cn_ipc_v2.txt.zip</labl><txt>IPC data table (v2.0). Features publication numbers for global linkage and an 'is_first' flag for primary IPC codes.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464288" URI="https://dataverse.harvard.edu/api/access/datafile/13464288" level="datafile"><labl>cn_npl.txt.zip</labl><txt>Non patent literature (NPL) references.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464336" URI="https://dataverse.harvard.edu/api/access/datafile/13464336" level="datafile"><labl>cn_title.txt.zip</labl><txt>Patent titles.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464334" URI="https://dataverse.harvard.edu/api/access/datafile/13464334" level="datafile"><labl>cn_top_term.txt.zip</labl><txt>NLP-extracted technical keywords.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13464286" URI="https://dataverse.harvard.edu/api/access/datafile/13464286" level="datafile"><labl>GPPD_ADCP_Invention_MasterKey.txt.zip</labl><txt>GPPD-ADCP Master Key</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/zip</notes></otherMat><otherMat ID="f13468540" URI="https://dataverse.harvard.edu/api/access/datafile/13468540" level="datafile"><labl>GPPD_analyse.R</labl><txt>R script for generating the Master Key and validation stats.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">type/x-r-syntax</notes></otherMat><otherMat ID="f13468541" URI="https://dataverse.harvard.edu/api/access/datafile/13468541" level="datafile"><labl>patent_parser.py</labl><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/x-python-script</notes></otherMat><otherMat ID="f13468539" URI="https://dataverse.harvard.edu/api/access/datafile/13468539" level="datafile"><labl>process_gppd.sh</labl><txt>Shell script for filtering raw data and calculating coverage.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-sh</notes></otherMat><otherMat ID="f13468538" URI="https://dataverse.harvard.edu/api/access/datafile/13468538" level="datafile"><labl>researchdata_parser.py</labl><txt>Python parser for patents.research.</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/x-python-script</notes></otherMat></codeBook>