Skip to content

add arc agi 2 #642

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 3 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/lighteval/tasks/default_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,57 @@ def apps(line, task_name: str = None):
)


def arc_agi_2(line, task_name: str = None):
# query from: https://github.com/arcprize/model_baseline/blob/main/src/prompts/system_prompt.txt
def convert_2d_list_to_string(list_of_lists: list[list[int]]) -> str:
"""
Convert a list of lists to a string
"""

string_list = ""

for row in list_of_lists:
string_list += json.dumps(row) + "\n"

return string_list

query = """You are participating in a puzzle solving competition. You are an expert at solving puzzles.

Below is a list of input and output pairs with a pattern. Your goal is to identify the pattern or transformation in the training examples that maps the input to the output, then apply that pattern to the test input to give a final output.

Respond in the format of the training output examples

--Training Examples--
{training_examples}
--End of Training Examples--

--Test Input--
{test_input}
--End of Test Input--

Your response:""".strip()

training_pairs = line["fewshots"]
training_examples = ""
for i, pair in enumerate(training_pairs):
training_examples += f"--Example {i}-- \n\n INPUT: \n\n"
training_examples += convert_2d_list_to_string(pair["input"]) + "\n\n"
training_examples += "OUTPUT: \n\n"
training_examples += convert_2d_list_to_string(pair["output"]) + "\n\n"

test_input = convert_2d_list_to_string(line["question"][0]["input"])

gold = str(line["question"][0]["output"])
query = query.format(training_examples=training_examples, test_input=test_input)

return Doc(
task_name=task_name,
query=query,
choices=[gold],
gold_index=0,
)


def arc(line, task_name: str = None):
return Doc(
task_name=task_name,
Expand Down
16 changes: 16 additions & 0 deletions src/lighteval/tasks/default_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,22 @@
trust_dataset=True,
version=0,
)
arc_agi_2 = LightevalTaskConfig(
name="arc_agi_2",
suite=["lighteval"],
prompt_function=prompt.arc_agi_2,
hf_repo="arc-agi-community/arc-agi-2",
hf_subset="default",
hf_avail_splits=["train", "test"],
evaluation_splits=["test"],
few_shots_split=None,
few_shots_select=None,
generation_size=2048,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure about 2048?

metric=[Metrics.exact_match],
stop_sequence=None,
trust_dataset=False,
version=0,
)
arc_c_letters_original = LightevalTaskConfig(
name="arc:c:letters",
suite=["original", "arc"],
Expand Down