Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import json
import argparse
def load_json(path):
with open(path, 'r') as f:
data = json.load(f)
return data
def save_json(path, data):
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4)
def main():
parser = argparse.ArgumentParser(description='Merge two span annotation JSON files')
parser.add_argument("--ann_path_a", type=str, help="Path to the first span annotations JSON file", required=True)
parser.add_argument("--ann_path_b", type=str, help="Path to the second span annotations JSON file", required=True)
parser.add_argument("--output_path", type=str, help="Path to the output merged span annotations JSON file", required=True)
args = parser.parse_args()
ann_a = load_json(args.ann_path_a)
ann_b = load_json(args.ann_path_b)
ann_merged = []
for ann_a_item, ann_b_item in zip(ann_a, ann_b):
a_done = ann_a_item["span_annotation_done"]
b_done = ann_b_item["span_annotation_done"]
if a_done and b_done:
raise ValueError("Both annotations are done for the same item")
elif a_done:
ann_merged.append(ann_a_item)
elif b_done:
ann_merged.append(ann_b_item)
else:
# Neither are done, just take the first one
ann_merged.append(ann_a_item)
save_json(args.output_path, ann_merged)
count_done = sum([1 for item in ann_merged if item["span_annotation_done"]])
print(f"Merge complete. {count_done} / {len(ann_merged)} items are annotated.")
if __name__ == "__main__":
main()