Skip to content

ARC AGI

get_input_data_model()

Returns ARC-AGI input data model for pipeline configurations.

Returns:

Type Description
DataModel

The ARC-AGI input data model

Source code in synalinks/src/datasets/arcagi.py
@synalinks_export("synalinks.datasets.arcagi.get_input_data_model")
def get_input_data_model():
    """
    Returns ARC-AGI input data model for pipeline configurations.

    Returns:
        (DataModel): The ARC-AGI input data model
    """
    return ARCAGIInput

get_output_data_model()

Returns ARC-AGI output data model for pipeline configurations.

Returns:

Type Description
DataModel

The ARC-AGI output data model

Source code in synalinks/src/datasets/arcagi.py
@synalinks_export("synalinks.datasets.arcagi.get_output_data_model")
def get_output_data_model():
    """
    Returns ARC-AGI output data model for pipeline configurations.

    Returns:
        (DataModel): The ARC-AGI output data model
    """
    return ARCAGIOutput

load_data(task_name, filepath=None, arc_version=1, one_leave_out=True, permutation=False, repeat=1, curriculum_learning=True)

Load task data by name.

Example:

(x_train, y_train), (x_test, y_test) = synalinks.datasets.arcagi1.load_data(
    task_name="62c24649",
    arc_version=1,
)

Parameters:

Name Type Description Default
task_name str

The name of the task

required
filepath str

The task filepath

None
arc_version int

ARC-AGI version between 1 or 2

1
one_leave_out bool

If True create a traning set using the one-leave-out technique.

True
permutation bool

If True augment the training data using permutation of examples.

False
repeat int

The number of times to repeat the training data.

1
curriculum_learning bool

Wether or not to sort the training set by difficulty. In this case, the difficulty refer to the grid-size of the expected output. (Default to True)

True
Source code in synalinks/src/datasets/arcagi.py
@synalinks_export("synalinks.datasets.arcagi.load_data")
def load_data(
    task_name,
    filepath=None,
    arc_version=1,
    one_leave_out=True,
    permutation=False,
    repeat=1,
    curriculum_learning=True,
):
    """
    Load task data by name.

    Example:

    ```python
    (x_train, y_train), (x_test, y_test) = synalinks.datasets.arcagi1.load_data(
        task_name="62c24649",
        arc_version=1,
    )
    ```

    Args:
        task_name (str): The name of the task
        filepath (str): The task filepath
        arc_version (int): ARC-AGI version between 1 or 2
        one_leave_out (bool): If True create a traning set using the one-leave-out technique.
        permutation (bool): If True augment the training data using permutation of examples.
        repeat (int): The number of times to repeat the training data.
        curriculum_learning (bool): Wether or not to sort the training set by difficulty.
            In this case, the difficulty refer to the grid-size of the expected output.
            (Default to True)
    """
    if filepath:
        try:
            with open(filepath, "r") as f:
                json_data = json.loads(f.read())
        except Exception:
            raise ValueError(
                f"Could not find task data at '{filepath}', "
                "make sure the path is correct.",
            )
    else:
        if arc_version == 1:
            if task_name in get_arcagi1_training_task_names():
                url = f"{ARCAGI1_BASE_URL}/training/{task_name}.json"
            elif task_name in get_arcagi1_evaluation_task_names():
                url = f"{ARCAGI1_BASE_URL}/evaluation/{task_name}.json"
            else:
                raise ValueError(
                    f"Task '{task_name}' not recognized, make sure that"
                    " the task name is valid."
                )
        elif arc_version == 2:
            if task_name in get_arcagi2_training_task_names():
                url = f"{ARCAGI2_BASE_URL}/training/{task_name}.json"
            elif task_name in get_arcagi2_evaluation_task_names():
                url = f"{ARCAGI2_BASE_URL}/evaluation/{task_name}.json"
            else:
                raise ValueError(
                    f"Task '{task_name}' not recognized, make sure that"
                    " the task name is valid."
                )
        else:
            raise ValueError("Invalid `arc_version` provided, should be 1 or 2")
        file_path = file_utils.get_file(origin=url, progbar=False)
        with open(file_path, "r") as f:
            json_data = json.loads(f.read())

    x_train = []
    y_train = []
    x_test = []
    y_test = []

    trainset = json_data.get("train")
    testset = json_data.get("test")

    for i in range(len(trainset)):
        if one_leave_out:
            other_examples = [j for j in range(len(trainset)) if j != i]
            if permutation:
                permutations = list(itertools.permutations(other_examples))
                for k, perm in enumerate(permutations):
                    examples = []
                    for j in perm:
                        input_grid_example = trainset[j].get("input")
                        output_grid_example = trainset[j].get("output")
                        task = ARCAGITask(
                            input_grid=input_grid_example,
                            output_grid=output_grid_example,
                        )
                        examples.append(task)
                    input_grid = trainset[i].get("input")
                    output_grid = trainset[i].get("output")
                    inputs = ARCAGIInput(
                        examples=examples,
                        input_grid=input_grid,
                    )
                    outputs = ARCAGIOutput(
                        output_grid=output_grid,
                    )
                    for i in range(repeat):
                        x_train.append(inputs)
                        y_train.append(outputs)
            else:
                examples = []
                for j in other_examples:
                    input_grid_example = trainset[j].get("input")
                    output_grid_example = trainset[j].get("output")
                    task = ARCAGITask(
                        input_grid=input_grid_example,
                        output_grid=output_grid_example,
                    )
                    examples.append(task)
                input_grid = trainset[i].get("input")
                output_grid = trainset[i].get("output")
                inputs = ARCAGIInput(
                    examples=examples,
                    input_grid=input_grid,
                )
                outputs = ARCAGIOutput(
                    output_grid=output_grid,
                )
                for i in range(repeat):
                    x_train.append(inputs)
                    y_train.append(outputs)

    for i in range(len(testset)):
        examples = []
        for j in range(len(trainset)):
            input_grid_example = trainset[j].get("input")
            output_grid_example = trainset[j].get("output")

            task = ARCAGITask(
                input_grid=input_grid_example,
                output_grid=output_grid_example,
            )
            examples.append(task)
        input_grid = testset[i].get("input")
        output_grid = testset[i].get("output")
        inputs = ARCAGIInput(
            examples=examples,
            input_grid=input_grid,
        )
        outputs = ARCAGIOutput(
            output_grid=output_grid,
        )
        x_test.append(inputs)
        y_test.append(outputs)

    if curriculum_learning:

        def get_output_grid_size(y_example):
            output_grid = y_example.output_grid
            if output_grid:
                return (
                    len(output_grid) * len(output_grid[0]) if len(output_grid) > 0 else 0
                )
            return 0

        training_pairs = list(zip(x_train, y_train))
        training_pairs.sort(key=lambda pair: get_output_grid_size(pair[1]))
        x_train, y_train = zip(*training_pairs)
        x_train = list(x_train)
        y_train = list(y_train)

    x_train = np.array(x_train, dtype="object")
    y_train = np.array(y_train, dtype="object")
    x_test = np.array(x_test, dtype="object")
    y_test = np.array(y_test, dtype="object")
    return (x_train, y_train), (x_test, y_test)