diff --git a/src/datamaestro_text/data/conversation/base.py b/src/datamaestro_text/data/conversation/base.py index 0046712..9019635 100644 --- a/src/datamaestro_text/data/conversation/base.py +++ b/src/datamaestro_text/data/conversation/base.py @@ -61,6 +61,13 @@ class AnswerEntry(Item): """The system answer""" +@define +class AnswerDocumentID(Item): + """An answer as a document ID""" + + document_id: str + + @define class RetrievedEntry(Item): """List of system-retrieved documents and their relevance""" @@ -69,7 +76,7 @@ class RetrievedEntry(Item): """List of retrieved documents""" document_relevances: Optional[List[str]] = None - """List of retrieved documents and their relevance status""" + """List of relevance status (optional)""" @define diff --git a/src/datamaestro_text/datasets/irds/data.py b/src/datamaestro_text/datasets/irds/data.py index a1601ea..2c9062c 100644 --- a/src/datamaestro_text/datasets/irds/data.py +++ b/src/datamaestro_text/datasets/irds/data.py @@ -399,7 +399,7 @@ def iter(self) -> Iterator[TopicRecord]: from datamaestro_text.data.conversation.base import ( ConversationTreeNode, DecontextualizedDictItem, - RetrievedEntry, + AnswerDocumentID, ConversationHistoryItem, EntryType, ) @@ -473,7 +473,7 @@ def records(self): node = node.add( ConversationTreeNode( Record( - RetrievedEntry(query.manual_canonical_result_id), + AnswerDocumentID(query.manual_canonical_result_id), EntryType.SYSTEM_ANSWER, ) )