From 9b5d4fba2f9bc518c6d7d35de30b5e3e340b65b7 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 25 Mar 2025 17:02:13 +0000
Subject: [PATCH 1/3] add arc agi 2

---
 src/lighteval/tasks/default_prompts.py | 31 ++++++++++++++++++++++++++
 src/lighteval/tasks/default_tasks.py   | 16 +++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py
index 3745a7724..4784bb65b 100644
--- a/src/lighteval/tasks/default_prompts.py
+++ b/src/lighteval/tasks/default_prompts.py
@@ -90,6 +90,37 @@ def apps(line, task_name: str = None):
     )
 
 
+def arc_agi_2(line, task_name: str = None):
+    query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles.
+
+Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output.
+
+Respond in the format of the training output examples
+
+--Training Examples--
+{training_examples}
+--End of Training Examples--
+
+--Test Input--
+{test_input}
+--End of Test Input--
+
+Your response:""".strip()
+
+    training_examples = line["fewshots"]
+    test_input = line["question"][0]["input"]
+
+    gold = str(line["question"][0]["output"])
+    query = query.format(training_examples=training_examples, test_input=test_input)
+
+    return Doc(
+        task_name=task_name,
+        query=query,
+        choices=[gold],
+        gold_index=0,
+    )
+
+
 def arc(line, task_name: str = None):
     return Doc(
         task_name=task_name,
diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py
index 16a83701d..090d4a292 100644
--- a/src/lighteval/tasks/default_tasks.py
+++ b/src/lighteval/tasks/default_tasks.py
@@ -436,6 +436,22 @@
     trust_dataset=True,
     version=0,
 )
+arc_agi_2 = LightevalTaskConfig(
+    name="arc_agi_2",
+    suite=["lighteval"],
+    prompt_function=prompt.arc_agi_2,
+    hf_repo="arc-agi-community/arc-agi-2",
+    hf_subset="default",
+    hf_avail_splits=["train", "test"],
+    evaluation_splits=["test"],
+    few_shots_split=None,
+    few_shots_select=None,
+    generation_size=2048,
+    metric=[Metrics.exact_match],
+    stop_sequence=None,
+    trust_dataset=False,
+    version=0,
+)
 arc_c_letters_original = LightevalTaskConfig(
     name="arc:c:letters",
     suite=["original", "arc"],

From 7348cb5093775e453505991c5c8f75f698e1b9aa Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 25 Mar 2025 17:05:53 +0000
Subject: [PATCH 2/3] add arc agi 2

---
 src/lighteval/tasks/default_prompts.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py
index 4784bb65b..31e339acc 100644
--- a/src/lighteval/tasks/default_prompts.py
+++ b/src/lighteval/tasks/default_prompts.py
@@ -91,6 +91,19 @@ def apps(line, task_name: str = None):
 
 
 def arc_agi_2(line, task_name: str = None):
+    # query from: https://github.com/arcprize/model_baseline/blob/main/src/prompts/system_prompt.txt
+    def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str:
+        """
+        Convert a list of lists to a string
+        """
+
+        string_list = ""
+
+        for row in list_of_lists:
+            string_list += json.dumps(row) + "\n"
+
+        return string_list
+
     query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles.
 
 Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output.
@@ -107,8 +120,15 @@ def arc_agi_2(line, task_name: str = None):
 
 Your response:""".strip()
 
-    training_examples = line["fewshots"]
-    test_input = line["question"][0]["input"]
+    training_pairs = line["fewshots"]
+    training_examples = ""
+    for i, pair in enumerate(training_pairs):
+        training_examples += f"--Example {i}-- \n\n INPUT: \n\n"
+        training_examples += convert_2d_list_to_string(pair.input) + "\n\n"
+        training_examples += "OUTPUT: \n\n"
+        training_examples += convert_2d_list_to_string(pair.output) + "\n\n"
+
+    test_input = convert_2d_list_to_string(line["question"][0]["input"])
 
     gold = str(line["question"][0]["output"])
     query = query.format(training_examples=training_examples, test_input=test_input)

From 922abb8a19bfd25470c9b73615681abdf30a0c98 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 25 Mar 2025 17:16:01 +0000
Subject: [PATCH 3/3] add arc agi 2

---
 src/lighteval/tasks/default_prompts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py
index 31e339acc..a50f334c4 100644
--- a/src/lighteval/tasks/default_prompts.py
+++ b/src/lighteval/tasks/default_prompts.py
@@ -124,9 +124,9 @@ def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str:
     training_examples = ""
     for i, pair in enumerate(training_pairs):
         training_examples += f"--Example {i}-- \n\n INPUT: \n\n"
-        training_examples += convert_2d_list_to_string(pair.input) + "\n\n"
+        training_examples += convert_2d_list_to_string(pair["input"]) + "\n\n"
         training_examples += "OUTPUT: \n\n"
-        training_examples += convert_2d_list_to_string(pair.output) + "\n\n"
+        training_examples += convert_2d_list_to_string(pair["output"]) + "\n\n"
 
     test_input = convert_2d_list_to_string(line["question"][0]["input"])