Skip to content

BBQ

get_input_data_model()

Returns BBQ input data model.

Source code in synalinks/src/datasets/built_in/bbq.py
@synalinks_export("synalinks.datasets.bbq.get_input_data_model")
def get_input_data_model():
    """Returns BBQ input data model."""
    return BBQQuestion

get_output_data_model()

Returns BBQ output data model.

Source code in synalinks/src/datasets/built_in/bbq.py
@synalinks_export("synalinks.datasets.bbq.get_output_data_model")
def get_output_data_model():
    """Returns BBQ output data model."""
    return BBQAnswer

iterable_dataset(repeat=1, batch_size=1, limit=None, category='age')

Streaming dataset for RL-style training.

Parameters:

Name Type Description Default
category str

BBQ category to stream ("age" by default).

'age'

Returns:

Type Description
HuggingFaceDataset

A streaming, iterable dataset.

Source code in synalinks/src/datasets/built_in/bbq.py
@synalinks_export("synalinks.datasets.bbq.iterable_dataset")
def iterable_dataset(repeat=1, batch_size=1, limit=None, category="age"):
    """
    Streaming dataset for RL-style training.

    Args:
        category (str): BBQ category to stream (``"age"`` by default).

    Returns:
        (HuggingFaceDataset): A streaming, iterable dataset.
    """
    return HuggingFaceDataset(
        path="walledai/BBQ",
        split=category,
        streaming=True,
        input_data_model=BBQQuestion,
        input_template=_INPUT_TEMPLATE,
        output_data_model=BBQAnswer,
        output_template=_OUTPUT_TEMPLATE,
        batch_size=batch_size,
        limit=limit,
        repeat=repeat,
    )

load_data(category='age', validation_split=0.2)

Load BBQ (Bias Benchmark for QA).

BBQ on HF is split by category (age, gender_identity, race_ethnicity, religion, ses, ...) rather than train / test. We load the requested category and split it deterministically into train / test.

Parameters:

Name Type Description Default
category str

The BBQ category to load. Defaults to "age".

'age'
validation_split float

Fraction held out for evaluation (default 0.2).

0.2

Returns:

Type Description
tuple

(x_train, y_train), (x_test, y_test).

Source code in synalinks/src/datasets/built_in/bbq.py
@synalinks_export("synalinks.datasets.bbq.load_data")
def load_data(category="age", validation_split=0.2):
    """
    Load BBQ (Bias Benchmark for QA).

    BBQ on HF is split by *category* (``age``, ``gender_identity``,
    ``race_ethnicity``, ``religion``, ``ses``, ...) rather than train /
    test. We load the requested category and split it deterministically
    into train / test.

    Args:
        category (str): The BBQ category to load. Defaults to ``"age"``.
        validation_split (float): Fraction held out for evaluation
            (default ``0.2``).

    Returns:
        (tuple): ``(x_train, y_train), (x_test, y_test)``.
    """
    x, y = load_split(
        path="walledai/BBQ",
        split=category,
        input_data_model=BBQQuestion,
        input_template=_INPUT_TEMPLATE,
        output_data_model=BBQAnswer,
        output_template=_OUTPUT_TEMPLATE,
    )
    return split_train_test(x, y, validation_split=validation_split)