Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
saliency-based-citation/run_mirage.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
58 lines (46 sloc)
2.51 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
import glob | |
import argparse | |
from utils.pipeline_helpers import run_command | |
def main(): | |
parser = argparse.ArgumentParser(description="Run Mirage and ELI5 evaluation scripts") | |
parser.add_argument("--config", type=str, default="mirage_configs/mistral.yaml", help="Path to Mirage config file") | |
parser.add_argument("--cci", type=int, default=-5, help="CCI value for Mirage") | |
parser.add_argument("--cti", type=int, default=1, help="CTI value for Mirage") | |
parser.add_argument("--xor_dir", type=str, default="./xor_attriqa/in-language", help="Path to XOR-AttriQA directory") | |
parser.add_argument("--eli5_path", type=str, default="data/eli5_eval_bm25_top100_reranked_oracle.json", help="Path to ELI5 dataset") | |
args = parser.parse_args() | |
# Check if data_input_with_ans directory exists, create it if it doesn't | |
if not os.path.exists('data_input_with_ans'): | |
os.makedirs('data_input_with_ans') | |
print("Created directory: data_input_with_ans") | |
# Convert ELI5 dataset | |
eli5_command = f"python to_mirage_format.py eli5 {args.eli5_path}" | |
print("Converting ELI5 dataset...") | |
run_command(eli5_command) | |
# Convert XOR-AttriQA dataset | |
xor_command = f"python to_mirage_format.py xorattriqa {args.xor_dir}" | |
print("Converting XOR-AttriQA dataset...") | |
run_command(xor_command) | |
# Get all JSON files in the data_input_with_ans directory | |
json_files = glob.glob('data_input_with_ans/*.json') | |
for file_path in json_files: | |
file_name = os.path.basename(file_path) | |
print(f"Processing file: {file_name}") | |
# Run mirage.py with the provided config, CCI, and CTI values | |
mirage_command = f"python mirage.py --f data_input_with_ans/{file_name} --config {args.config} --f_with_ans --CCI {args.cci} --CTI {args.cti}" | |
return_code = run_command(mirage_command) | |
if return_code != 0: | |
print(f"Error processing {file_name} with mirage.py") | |
continue | |
# If file_name contains 'eli5', run run_eli5_eval.py | |
if 'eli5' in file_name.lower(): | |
eli5_command = f"python run_eli5_eval.py --f AA_res/{file_name.split('.json')[0]}.mirage_CTI_{args.cti}_CCI_{args.cci} --citations --claims_nli" | |
return_code = run_command(eli5_command) | |
if return_code != 0: | |
print(f"Error processing {file_name} with run_eli5_eval.py") | |
print(f"Finished processing {file_name}") | |
print("-" * 50) | |
if __name__ == "__main__": | |
main() |