run_mirage.py

import os
import subprocess
import glob
import argparse
from utils.pipeline_helpers import run_command

def main():
    parser = argparse.ArgumentParser(description="Run Mirage and ELI5 evaluation scripts")
    parser.add_argument("--config", type=str, default="mirage_configs/mistral.yaml", help="Path to Mirage config file")
    parser.add_argument("--cci", type=int, default=-5, help="CCI value for Mirage")
    parser.add_argument("--cti", type=int, default=1, help="CTI value for Mirage")
    parser.add_argument("--xor_dir", type=str, default="./xor_attriqa/in-language", help="Path to XOR-AttriQA directory")
    parser.add_argument("--eli5_path", type=str, default="data/eli5_eval_bm25_top100_reranked_oracle.json", help="Path to ELI5 dataset")
    args = parser.parse_args()

    # Check if data_input_with_ans directory exists, create it if it doesn't
    if not os.path.exists('data_input_with_ans'):
        os.makedirs('data_input_with_ans')
        print("Created directory: data_input_with_ans")

        # Convert ELI5 dataset
        eli5_command = f"python to_mirage_format.py eli5 {args.eli5_path}"
        print("Converting ELI5 dataset...")
        run_command(eli5_command)

        # Convert XOR-AttriQA dataset
        xor_command = f"python to_mirage_format.py xorattriqa {args.xor_dir}"
        print("Converting XOR-AttriQA dataset...")
        run_command(xor_command)

    # Get all JSON files in the data_input_with_ans directory
    json_files = glob.glob('data_input_with_ans/*.json')

    for file_path in json_files:
        file_name = os.path.basename(file_path)
        print(f"Processing file: {file_name}")

        # Run mirage.py with the provided config, CCI, and CTI values
        mirage_command = f"python mirage.py --f data_input_with_ans/{file_name} --config {args.config} --f_with_ans --CCI {args.cci} --CTI {args.cti}"
        return_code = run_command(mirage_command)

        if return_code != 0:
            print(f"Error processing {file_name} with mirage.py")
            continue

        # If file_name contains 'eli5', run run_eli5_eval.py
        if 'eli5' in file_name.lower():
            eli5_command = f"python run_eli5_eval.py --f AA_res/{file_name.split('.json')[0]}.mirage_CTI_{args.cti}_CCI_{args.cci} --citations --claims_nli"
            return_code = run_command(eli5_command)

            if return_code != 0:
                print(f"Error processing {file_name} with run_eli5_eval.py")

        print(f"Finished processing {file_name}")
        print("-" * 50)

if __name__ == "__main__":
    main()
	import os
	import subprocess
	import glob
	import argparse
	from utils.pipeline_helpers import run_command

	def main():
	parser = argparse.ArgumentParser(description="Run Mirage and ELI5 evaluation scripts")
	parser.add_argument("--config", type=str, default="mirage_configs/mistral.yaml", help="Path to Mirage config file")
	parser.add_argument("--cci", type=int, default=-5, help="CCI value for Mirage")
	parser.add_argument("--cti", type=int, default=1, help="CTI value for Mirage")
	parser.add_argument("--xor_dir", type=str, default="./xor_attriqa/in-language", help="Path to XOR-AttriQA directory")
	parser.add_argument("--eli5_path", type=str, default="data/eli5_eval_bm25_top100_reranked_oracle.json", help="Path to ELI5 dataset")
	args = parser.parse_args()

	# Check if data_input_with_ans directory exists, create it if it doesn't
	if not os.path.exists('data_input_with_ans'):
	os.makedirs('data_input_with_ans')
	print("Created directory: data_input_with_ans")

	# Convert ELI5 dataset
	eli5_command = f"python to_mirage_format.py eli5 {args.eli5_path}"
	print("Converting ELI5 dataset...")
	run_command(eli5_command)

	# Convert XOR-AttriQA dataset
	xor_command = f"python to_mirage_format.py xorattriqa {args.xor_dir}"
	print("Converting XOR-AttriQA dataset...")
	run_command(xor_command)

	# Get all JSON files in the data_input_with_ans directory
	json_files = glob.glob('data_input_with_ans/*.json')

	for file_path in json_files:
	file_name = os.path.basename(file_path)
	print(f"Processing file: {file_name}")

	# Run mirage.py with the provided config, CCI, and CTI values
	mirage_command = f"python mirage.py --f data_input_with_ans/{file_name} --config {args.config} --f_with_ans --CCI {args.cci} --CTI {args.cti}"
	return_code = run_command(mirage_command)

	if return_code != 0:
	print(f"Error processing {file_name} with mirage.py")
	continue

	# If file_name contains 'eli5', run run_eli5_eval.py
	if 'eli5' in file_name.lower():
	eli5_command = f"python run_eli5_eval.py --f AA_res/{file_name.split('.json')[0]}.mirage_CTI_{args.cti}_CCI_{args.cci} --citations --claims_nli"
	return_code = run_command(eli5_command)

	if return_code != 0:
	print(f"Error processing {file_name} with run_eli5_eval.py")

	print(f"Finished processing {file_name}")
	print("-" * 50)

	if __name__ == "__main__":
	main()