{"id":10117852,"identifier":"DVN/PRHQMK","persistentUrl":"https://doi.org/10.7910/DVN/PRHQMK","protocol":"doi","authority":"10.7910","separator":"/","publisher":"Harvard Dataverse","publicationDate":"2024-04-14","storageIdentifier":"file://10.7910/DVN/PRHQMK","datasetType":"dataset","datasetVersion":{"id":415335,"datasetId":10117852,"datasetPersistentId":"doi:10.7910/DVN/PRHQMK","storageIdentifier":"file://10.7910/DVN/PRHQMK","versionNumber":1,"versionMinorNumber":0,"versionState":"RELEASED","latestVersionPublishingState":"RELEASED","deaccessionLink":"","lastUpdateTime":"2024-04-14T18:49:15Z","releaseTime":"2024-04-14T18:49:15Z","createTime":"2024-04-14T18:49:04Z","publicationDate":"2024-04-14","citationDate":"2024-04-14","license":{"name":"CC0 1.0","uri":"http://creativecommons.org/publicdomain/zero/1.0","iconUri":"https://licensebuttons.net/p/zero/1.0/88x31.png","rightsIdentifier":"CC0-1.0","rightsIdentifierScheme":"SPDX","schemeUri":"https://spdx.org/licenses/","languageCode":"en"},"fileAccessRequest":true,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"Image-Guided Object Detection using OWL-ViTand Enhanced Query Embedding Extraction"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Melih Serin"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Boğaziçi University"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Melih Serin"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Boğaziçi University"},"datasetContactEmail":{"typeName":"datasetContactEmail","multiple":false,"typeClass":"primitive","value":"melihsrnn@gmail.com"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"Computer vision has been receiving increasing attention with the recent complex Generative AI models released by tech industry giants, such as OpenAI and Google. However, there is a specific subfield that we wanted to focus on, that is, Image-Guided Object Detection. A detailed literature survey directed us towards a successful study called Simple Open-Vocabulary Object Detection with Vision Transformers (OWL-ViT) [1], which is a multifunctional complex model that can also perform image-guided object detection as a side function. In this study, some experiments have been conducted utilizing OWL-ViT architecture as the base model and manipulated the necessary parts to achieve a better one-shot performance. Code and models are available on GitHub."},"dsDescriptionDate":{"typeName":"dsDescriptionDate","multiple":false,"typeClass":"primitive","value":"2024-04-15"}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Engineering"]},{"typeName":"keyword","multiple":true,"typeClass":"compound","value":[{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Open-Vocabulary Object Detection with Vision Transformers (OWL-ViT)"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Object Detection"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Vision Transformers"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"End-to-End Training"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Generalized Intersection over Union (gIoU) Loss"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationIDNumber":{"typeName":"publicationIDNumber","multiple":false,"typeClass":"primitive","value":"10.5281/zenodo.10938342"}}]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"KUUJE"},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2024-04-14"}]}},"files":[{"label":"ImageGuidedObjectDetection.pdf","restricted":false,"version":1,"datasetVersionId":415335,"dataFile":{"id":10117853,"persistentId":"","filename":"ImageGuidedObjectDetection.pdf","contentType":"application/pdf","friendlyType":"Adobe PDF","filesize":4173342,"storageIdentifier":"s3://dvn-cloud:18edeccb62a-f3945d289a53","rootDataFileId":-1,"md5":"3d43488c06f591c6c2a832dc82181d05","checksum":{"type":"MD5","value":"3d43488c06f591c6c2a832dc82181d05"},"tabularData":false,"creationDate":"2024-04-14","publicationDate":"2024-04-14","fileAccessRequest":true}}],"citation":"Melih Serin, 2024, \"Image-Guided Object Detection using OWL-ViTand Enhanced Query Embedding Extraction\", https://doi.org/10.7910/DVN/PRHQMK, Harvard Dataverse, V1"}}