Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
saliency-based-citation/merge_annotations.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
42 lines (35 sloc)
1.55 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import argparse | |
def load_json(path): | |
with open(path, 'r') as f: | |
data = json.load(f) | |
return data | |
def save_json(path, data): | |
with open(path, "w", encoding="utf-8") as f: | |
json.dump(data, f, indent=4) | |
def main(): | |
parser = argparse.ArgumentParser(description='Merge two span annotation JSON files') | |
parser.add_argument("--ann_path_a", type=str, help="Path to the first span annotations JSON file", required=True) | |
parser.add_argument("--ann_path_b", type=str, help="Path to the second span annotations JSON file", required=True) | |
parser.add_argument("--output_path", type=str, help="Path to the output merged span annotations JSON file", required=True) | |
args = parser.parse_args() | |
ann_a = load_json(args.ann_path_a) | |
ann_b = load_json(args.ann_path_b) | |
ann_merged = [] | |
for ann_a_item, ann_b_item in zip(ann_a, ann_b): | |
a_done = ann_a_item["span_annotation_done"] | |
b_done = ann_b_item["span_annotation_done"] | |
if a_done and b_done: | |
raise ValueError("Both annotations are done for the same item") | |
elif a_done: | |
ann_merged.append(ann_a_item) | |
elif b_done: | |
ann_merged.append(ann_b_item) | |
else: | |
# Neither are done, just take the first one | |
ann_merged.append(ann_a_item) | |
save_json(args.output_path, ann_merged) | |
count_done = sum([1 for item in ann_merged if item["span_annotation_done"]]) | |
print(f"Merge complete. {count_done} / {len(ann_merged)} items are annotated.") | |
if __name__ == "__main__": | |
main() |