<?xml version='1.0' encoding='UTF-8'?><codeBook xmlns="ddi:codebook:2_5" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:codebook:2_5 https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" version="2.5"><docDscr><citation><titlStmt><titl>Stanford NLP Model Output for Biofuel Patent Classification</titl><IDNo agency="DOI">doi:10.7910/DVN/29374</IDNo></titlStmt><distStmt><distrbtr source="archive">Harvard Dataverse</distrbtr><distDate>2015-03-06</distDate></distStmt><verStmt source="archive"><version date="2015-03-05" type="RELEASED">1</version></verStmt><biblCit>Kessler, Jeff, 2015, "Stanford NLP Model Output for Biofuel Patent Classification", https://doi.org/10.7910/DVN/29374, Harvard Dataverse, V1</biblCit></citation></docDscr><stdyDscr><citation><titlStmt><titl>Stanford NLP Model Output for Biofuel Patent Classification</titl><IDNo agency="DOI">doi:10.7910/DVN/29374</IDNo></titlStmt><rspStmt><AuthEnty affiliation="University of California, Davis">Kessler, Jeff</AuthEnty></rspStmt><prodStmt/><distStmt><distrbtr source="archive">Harvard Dataverse</distrbtr><distrbtr URI="http://thedata.harvard.edu/dvn/">Harvard Dataverse Network</distrbtr><contact email="jkessler@ucdavis.edu">Jeff Kessler</contact><depDate>2015-03-06</depDate><distDate>2015</distDate></distStmt><holdings URI="https://doi.org/10.7910/DVN/29374"/></citation><stdyInfo><subject><keyword vocab="NLP">Biofuel Classifier</keyword><topcClas>Natural Language Processing</topcClas></subject><abstract date="2015-03">This NLP model was generated using the Stanford NLP Classifier (available from: http://nlp.stanford.edu/software/classifier.shtml).  The model was trained using a random selection of 700 manually classified biofuel patents from 1976 through 2013, and validated against 300 manually classified biofuel patents on January 03, 2014.   Included are the classification results and associated patent numbers for both the manually trained patents, and for the automatically categorized patents.</abstract><sumDscr><timePrd cycle="P1" event="start" date="1976">1976</timePrd><timePrd cycle="P1" event="end" date="2013">2013</timePrd><geogCover>United States</geogCover></sumDscr></stdyInfo><method><dataColl><sources/></dataColl><anlyInfo/></method><dataAccs><setAvail/><useStmt/><notes type="DVN:TOU" level="dv">&lt;a href="http://creativecommons.org/publicdomain/zero/1.0">CC0 1.0&lt;/a></notes></dataAccs><othrStdyMat/></stdyDscr><otherMat ID="f2544481" URI="https://dataverse.harvard.edu/api/access/datafile/2544481" level="datafile"><labl>Manual Classification.csv</labl><txt>This is the initial list of 1000 patents manually classified for use with training and validating the NLP model</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/plain; charset=US-ASCII</notes></otherMat><otherMat ID="f2544475" URI="https://dataverse.harvard.edu/api/access/datafile/2544475" level="datafile"><labl>ner-model.ser.gz</labl><txt>This is the model generated by the Stanford NLP Classifier</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">application/x-gzip</notes></otherMat><otherMat ID="f2544480" URI="https://dataverse.harvard.edu/api/access/datafile/2544480" level="datafile"><labl>NLP Classification.csv</labl><txt>This is the list of patents and associated classifications based on the NLP model that was trained using the manually classified patents</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/plain; charset=US-ASCII</notes></otherMat><otherMat ID="f2544476" URI="https://dataverse.harvard.edu/api/access/datafile/2544476" level="datafile"><labl>patents_test.prop</labl><txt>This is the property file used for parameterizing the model</txt><notes level="file" type="DATAVERSE:CONTENTTYPE" subject="Content/MIME Type">text/plain; charset=US-ASCII</notes></otherMat></codeBook>