From 9b5d4fba2f9bc518c6d7d35de30b5e3e340b65b7 Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Tue, 25 Mar 2025 17:02:13 +0000 Subject: [PATCH 1/3] add arc agi 2 --- src/lighteval/tasks/default_prompts.py | 31 ++++++++++++++++++++++++++ src/lighteval/tasks/default_tasks.py | 16 +++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py index 3745a7724..4784bb65b 100644 --- a/src/lighteval/tasks/default_prompts.py +++ b/src/lighteval/tasks/default_prompts.py @@ -90,6 +90,37 @@ def apps(line, task_name: str = None): ) +def arc_agi_2(line, task_name: str = None): + query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles. + +Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output. + +Respond in the format of the training output examples + +--Training Examples-- +{training_examples} +--End of Training Examples-- + +--Test Input-- +{test_input} +--End of Test Input-- + +Your response:""".strip() + + training_examples = line["fewshots"] + test_input = line["question"][0]["input"] + + gold = str(line["question"][0]["output"]) + query = query.format(training_examples=training_examples, test_input=test_input) + + return Doc( + task_name=task_name, + query=query, + choices=[gold], + gold_index=0, + ) + + def arc(line, task_name: str = None): return Doc( task_name=task_name, diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py index 16a83701d..090d4a292 100644 --- a/src/lighteval/tasks/default_tasks.py +++ b/src/lighteval/tasks/default_tasks.py @@ -436,6 +436,22 @@ trust_dataset=True, version=0, ) +arc_agi_2 = LightevalTaskConfig( + name="arc_agi_2", + suite=["lighteval"], + prompt_function=prompt.arc_agi_2, + hf_repo="arc-agi-community/arc-agi-2", + hf_subset="default", + hf_avail_splits=["train", "test"], + evaluation_splits=["test"], + few_shots_split=None, + few_shots_select=None, + generation_size=2048, + metric=[Metrics.exact_match], + stop_sequence=None, + trust_dataset=False, + version=0, +) arc_c_letters_original = LightevalTaskConfig( name="arc:c:letters", suite=["original", "arc"], From 7348cb5093775e453505991c5c8f75f698e1b9aa Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Tue, 25 Mar 2025 17:05:53 +0000 Subject: [PATCH 2/3] add arc agi 2 --- src/lighteval/tasks/default_prompts.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py index 4784bb65b..31e339acc 100644 --- a/src/lighteval/tasks/default_prompts.py +++ b/src/lighteval/tasks/default_prompts.py @@ -91,6 +91,19 @@ def apps(line, task_name: str = None): def arc_agi_2(line, task_name: str = None): + # query from: https://github.com/arcprize/model_baseline/blob/main/src/prompts/system_prompt.txt + def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str: + """ + Convert a list of lists to a string + """ + + string_list = "" + + for row in list_of_lists: + string_list += json.dumps(row) + "\n" + + return string_list + query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles. Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output. @@ -107,8 +120,15 @@ def arc_agi_2(line, task_name: str = None): Your response:""".strip() - training_examples = line["fewshots"] - test_input = line["question"][0]["input"] + training_pairs = line["fewshots"] + training_examples = "" + for i, pair in enumerate(training_pairs): + training_examples += f"--Example {i}-- \n\n INPUT: \n\n" + training_examples += convert_2d_list_to_string(pair.input) + "\n\n" + training_examples += "OUTPUT: \n\n" + training_examples += convert_2d_list_to_string(pair.output) + "\n\n" + + test_input = convert_2d_list_to_string(line["question"][0]["input"]) gold = str(line["question"][0]["output"]) query = query.format(training_examples=training_examples, test_input=test_input) From 922abb8a19bfd25470c9b73615681abdf30a0c98 Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Tue, 25 Mar 2025 17:16:01 +0000 Subject: [PATCH 3/3] add arc agi 2 --- src/lighteval/tasks/default_prompts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py index 31e339acc..a50f334c4 100644 --- a/src/lighteval/tasks/default_prompts.py +++ b/src/lighteval/tasks/default_prompts.py @@ -124,9 +124,9 @@ def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str: training_examples = "" for i, pair in enumerate(training_pairs): training_examples += f"--Example {i}-- \n\n INPUT: \n\n" - training_examples += convert_2d_list_to_string(pair.input) + "\n\n" + training_examples += convert_2d_list_to_string(pair["input"]) + "\n\n" training_examples += "OUTPUT: \n\n" - training_examples += convert_2d_list_to_string(pair.output) + "\n\n" + training_examples += convert_2d_list_to_string(pair["output"]) + "\n\n" test_input = convert_2d_list_to_string(line["question"][0]["input"])