Skip to content

Commit

Permalink
#348 merge existing and new annotation fields
Browse files Browse the repository at this point in the history
  • Loading branch information
twinkarma committed May 26, 2023
1 parent 416083c commit 101acd2
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 26 deletions.
21 changes: 12 additions & 9 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,28 +969,31 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
# Create dictionary for document
doc_dict = None
if json_format == "raw" or json_format == "csv":
doc_dict = self.data
doc_dict = dict(self.data)
elif json_format == "gate":
# GATE json format are expected to have an existing "features" field
features_dict = self.data["features"] if "features" in self.data and isinstance(self.data["features"], dict) else {}
features_dict = dict(self.data["features"]) if "features" in self.data and isinstance(self.data["features"], dict) else {}

# Add any non-compliant top-level fields into the "features" field instead
ignore_keys = {"text", "features", self.project.document_id_field}
ignore_keys = {"text", "features", "offset_type", "annotation_sets", self.project.document_id_field}
features_dict.update({key: value for key, value in self.data.items() if key not in ignore_keys})

doc_dict = {
"text": self.data["text"],
"features": features_dict,
"offset_type": "p",
"offset_type": self.data["offset_type"] if "offset_type" in self.data else "p", # Use original offset type
"name": get_value_from_key_path(self.data, self.project.document_id_field)
}




# Insert annotation sets into the doc dict
annotations = self.annotations.filter(status=Annotation.COMPLETED)
if json_format == "csv":
# Gets pre-existing annotations
annotation_sets = dict(self.data["annotations"]) if "annotations" in self.data else {}
# Format annotations for CSV export
annotation_sets = {}
for annotation in annotations:
a_data = annotation.data
annotation_dict = {}
Expand All @@ -1010,8 +1013,9 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
doc_dict["annotations"] = annotation_sets

else:
# Gets pre-existing annotations
annotation_sets = dict(self.data["annotation_sets"]) if "annotation_sets" in self.data else {}
# Format for JSON in line with GATE formatting
annotation_sets = {}
for annotation in annotations:
a_data = annotation.data
anonymized_name = f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}"
Expand All @@ -1024,14 +1028,13 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
"end": 0,
"id": 0,
"duration_seconds": annotation.time_to_complete,
"features": {
"label": a_data
}
"features": a_data
}
],
"next_annid": 1,
}
annotation_sets[anonymized_name if anonymize else annotation.user.username] = annotation_set

doc_dict["annotation_sets"] = annotation_sets

return doc_dict
Expand Down
103 changes: 86 additions & 17 deletions backend/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,8 +1115,52 @@ def setUp(self):
"gate_format_feature1": "Gate feature test value",
"gate_format_feature2": "Gate feature test value",
"gate_format_feature3": "Gate feature test value",
},
"offset_type": "x",
"annotations": {
"existing_annotator1": {
"sentiment": "positive"
},
f"2": {
"sentiment": "positive"
}

},
"annotation_sets": {
"existing_annotator1": {
"name": "existing_annotator1",
"annotations": [
{
"type": "Document",
"start": 0,
"end": 10,
"id": 0,
"features": {
"sentiment": "positive"
}
}
],
"next_annid": 1
},
f"{settings.ANONYMIZATION_PREFIX}2": {
"name": f"{settings.ANONYMIZATION_PREFIX}1",
"annotations": [
{
"type": "Document",
"start": 0,
"end": 10,
"id": 0,
"features": {
"sentiment": "positive"
}
}
],
"next_annid": 1
}

}


}
)

Expand Down Expand Up @@ -1161,11 +1205,14 @@ def test_export_raw(self):
self.assertTrue("feature2" in doc_dict)
self.assertTrue("feature3" in doc_dict)
self.assertTrue("features" in doc_dict)
self.assertTrue("offset_type" in doc_dict)
self.assertTrue("annotations" in doc_dict)
doc_features = doc_dict["features"]
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)


self.check_raw_gate_annotation_formatting(doc_dict)

def test_export_gate(self):
Expand All @@ -1178,37 +1225,56 @@ def test_export_gate(self):

self.assertTrue("text" in doc_dict)
self.assertTrue("features" in doc_dict)
self.assertFalse("annotations" in doc_dict)
self.assertEqual(doc_dict["offset_type"], "x")
doc_features = doc_dict["features"]
self.assertTrue("id" in doc_features)
self.assertTrue("feature1" in doc_features)
self.assertTrue("feature2" in doc_features)
self.assertTrue("feature3" in doc_features)
self.assertTrue("annotations" in doc_features)
self.assertFalse("features" in doc_features, "Double nesting of features field")
self.assertFalse("offset_type" in doc_features, "Double nesting of offset_type field")
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)

self.check_raw_gate_annotation_formatting(doc_dict)

def test_export_gate_with_no_offset_type(self):

for document in self.project.documents.all():
document.data.pop("offset_type")

doc_dict = document.get_doc_annotation_dict("gate")
self.assertEqual(doc_dict["offset_type"], "p", "offset_type should default to p")


def check_raw_gate_annotation_formatting(self, doc_dict):
self.assertTrue("annotation_sets" in doc_dict)
self.assertTrue(len(doc_dict["annotation_sets"]) == 3)
self.assertEqual(len(doc_dict["annotation_sets"]), 4)

# Test annotation formatting
for aset_key, aset_data in doc_dict["annotation_sets"].items():
self.assertTrue("name" in aset_data)
self.assertTrue("annotations" in aset_data)
self.assertEqual(len(aset_data["annotations"]), 1)
anno_dict = aset_data["annotations"][0]
self.assertTrue("type" in anno_dict)
self.assertTrue("start" in anno_dict)
self.assertTrue("end" in anno_dict)
self.assertTrue("id" in anno_dict)
self.assertTrue("features" in anno_dict)
self.assertTrue("label" in anno_dict["features"])
label_dict = anno_dict["features"]["label"]
self.assertTrue("text1" in label_dict)
self.assertTrue("checkbox1" in label_dict)
if aset_key != "existing_annotator1":
self.assertTrue("name" in aset_data)
self.assertTrue("annotations" in aset_data)
self.assertEqual(len(aset_data["annotations"]), 1)
anno_dict = aset_data["annotations"][0]
self.assertTrue("type" in anno_dict)
self.assertTrue("start" in anno_dict)
self.assertTrue("end" in anno_dict)
self.assertTrue("id" in anno_dict)
self.assertTrue("features" in anno_dict)
features_dict = anno_dict["features"]
self.assertTrue("text1" in features_dict)
self.assertTrue("checkbox1" in features_dict)
else:
# Check that existing annotation from document upload is carried over
self.assertEqual(aset_data["annotations"][0]["features"]["sentiment"], "positive")




def test_export_csv(self):

Expand All @@ -1222,11 +1288,14 @@ def test_export_csv(self):
self.assertTrue("feature2" in doc_dict)
self.assertTrue("feature3" in doc_dict)
self.assertTrue("annotations" in doc_dict)
self.assertTrue(len(doc_dict["annotations"]) == 3)
self.assertEqual(len(doc_dict["annotations"]), 4)
anno_set_dict = doc_dict["annotations"]
for set_key in anno_set_dict:
self.assertTrue(isinstance(anno_set_dict[set_key]["text1"], str))
self.assertTrue(isinstance(anno_set_dict[set_key]["checkbox1"], str))
if set_key != "existing_annotator1":
self.assertTrue(isinstance(anno_set_dict[set_key]["text1"], str))
self.assertTrue(isinstance(anno_set_dict[set_key]["checkbox1"], str))
else:
self.assertEqual(anno_set_dict[set_key]["sentiment"], "positive")

def test_export_raw_anonymized(self):

Expand Down

0 comments on commit 101acd2

Please # to comment.