diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py index 3745a7724..a50f334c4 100644 --- a/src/lighteval/tasks/default_prompts.py +++ b/src/lighteval/tasks/default_prompts.py @@ -90,6 +90,57 @@ def apps(line, task_name: str = None): ) +def arc_agi_2(line, task_name: str = None): + # query from: https://github.com/arcprize/model_baseline/blob/main/src/prompts/system_prompt.txt + def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str: + """ + Convert a list of lists to a string + """ + + string_list = "" + + for row in list_of_lists: + string_list += json.dumps(row) + "\n" + + return string_list + + query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles. + +Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output. + +Respond in the format of the training output examples + +--Training Examples-- +{training_examples} +--End of Training Examples-- + +--Test Input-- +{test_input} +--End of Test Input-- + +Your response:""".strip() + + training_pairs = line["fewshots"] + training_examples = "" + for i, pair in enumerate(training_pairs): + training_examples += f"--Example {i}-- \n\n INPUT: \n\n" + training_examples += convert_2d_list_to_string(pair["input"]) + "\n\n" + training_examples += "OUTPUT: \n\n" + training_examples += convert_2d_list_to_string(pair["output"]) + "\n\n" + + test_input = convert_2d_list_to_string(line["question"][0]["input"]) + + gold = str(line["question"][0]["output"]) + query = query.format(training_examples=training_examples, test_input=test_input) + + return Doc( + task_name=task_name, + query=query, + choices=[gold], + gold_index=0, + ) + + def arc(line, task_name: str = None): return Doc( task_name=task_name, diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py index 16a83701d..090d4a292 100644 --- a/src/lighteval/tasks/default_tasks.py +++ b/src/lighteval/tasks/default_tasks.py @@ -436,6 +436,22 @@ trust_dataset=True, version=0, ) +arc_agi_2 = LightevalTaskConfig( + name="arc_agi_2", + suite=["lighteval"], + prompt_function=prompt.arc_agi_2, + hf_repo="arc-agi-community/arc-agi-2", + hf_subset="default", + hf_avail_splits=["train", "test"], + evaluation_splits=["test"], + few_shots_split=None, + few_shots_select=None, + generation_size=2048, + metric=[Metrics.exact_match], + stop_sequence=None, + trust_dataset=False, + version=0, +) arc_c_letters_original = LightevalTaskConfig( name="arc:c:letters", suite=["original", "arc"],