<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd">
  <identifier identifierType="DOI">10.7910/DVN/OV2WAM</identifier>
  <creators>
    <creator>
      <creatorName nameType="Personal">Milani, Alfredo</creatorName>
      <givenName>Alfredo</givenName>
      <familyName>Milani</familyName>
      <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="https://orcid.org">https://orcid.org/0000-0003-4534-1805</nameIdentifier>
      <affiliation affiliationIdentifier="https://ror.org/035mh1293" schemeURI="https://ror.org" affiliationIdentifierScheme="ROR">Link Campus University</affiliation>
    </creator>
  </creators>
  <titles>
    <title>Vulnerability of LLMs in Educational Assessment</title>
  </titles>
  <publisher>Harvard Dataverse</publisher>
  <publicationYear>2025</publicationYear>
  <subjects>
    <subject>Computer and Information Science</subject>
    <subject>Social Sciences</subject>
    <subject>Large Language Models</subject>
    <subject subjectScheme="Generative AI">Prompt Injection</subject>
    <subject>Education Sciences</subject>
    <subject>Education Evaluation</subject>
    <subject>Trustworthy AI</subject>
    <subject>Human-in-the-Loop AI</subject>
  </subjects>
  <contributors>
    <contributor contributorType="ContactPerson">
      <contributorName nameType="Personal">Milani, Alfredo</contributorName>
      <givenName>Alfredo</givenName>
      <familyName>Milani</familyName>
      <affiliation>Link Campus University, Rome, Italy</affiliation>
    </contributor>
    <contributor contributorType="ContactPerson">
      <contributorName nameType="Personal">Valentina Franzoni</contributorName>
      <givenName>Valentina</givenName>
      <familyName>Franzoni</familyName>
      <affiliation>University of Perugia, Italy</affiliation>
    </contributor>
    <contributor contributorType="ContactPerson">
      <contributorName nameType="Personal">Florindi Emanuele</contributorName>
      <affiliation>University of Modena-Reggio Emilia</affiliation>
    </contributor>
  </contributors>
  <dates>
    <date dateType="Submitted">2025-09-12</date>
    <date dateType="Available">2025-09-12</date>
  </dates>
  <resourceType resourceTypeGeneral="Dataset"/>
  <relatedIdentifiers>
    <relatedIdentifier relationType="IsSupplementTo" relatedIdentifierType="ISSN">2227-7102</relatedIdentifier>
  </relatedIdentifiers>
  <sizes>
    <size>4804924</size>
  </sizes>
  <formats>
    <format>application/zip</format>
  </formats>
  <version>1.0</version>
  <rightsList>
    <rights rightsURI="info:eu-repo/semantics/openAccess"/>
    <rights rightsURI="http://creativecommons.org/publicdomain/zero/1.0" rightsIdentifier="CC0-1.0" rightsIdentifierScheme="SPDX" schemeURI="https://spdx.org/licenses/" xml:lang="en">Creative Commons CC0 1.0 Universal Public Domain Dedication.</rights>
  </rightsList>
  <descriptions>
    <description descriptionType="Abstract">The dataset contains the output of experiments on a research project on 
Vulnerability of LLMs in Educational Assessment.

The Dataset contains:
-the students assignments data in normal form and the injected form
-the output produced by the experimented LLMs: ChatGPT, Gemini, DeepSeek, Grok, Perplexity and Copilot for the experiments evaluation the assignments, as a single document and collectively as a group of documents, denominated:
 
-User Legitimate LLMs Prompts
-Normal (no injection) providing the reference base evaluation
 -Prompt Injection Pass, one  type of injection experiments, called Fail-To-Top,  to move an assignment evailuated FAIL by reference base evaluation to PASS, i.e. above 35% of total points.
 -Prompt Injection to Top25 , a type of injection experiments  to move to top 25% an assignment with lowe reference base evaluation . This latter type of experiment come in 3 versions, Fail-To-Top, Sat-To-Top, Good-To-Top where assignment with reference base evaluation respectively: Fail (below 35%), Satisfactory (greater than 25% and belo 50%) and Good (above 50% and below 75%) are considered for injection.

The name of the folders and output results files are accordingly self-explanatory .</description>
  </descriptions>
</resource>
