HotpotQA

`get_input_data_model()`

Returns HotpotQA input data model for pipeline configurations.

Returns:

Type	Description
`DataModel`	The HotpotQA input data model

Source code in synalinks/src/datasets/hotpotqa.py

@synalinks_export("synalinks.datasets.hotpotqa.get_input_data_model")
def get_input_data_model():
    """
    Returns HotpotQA input data model for pipeline configurations.

    Returns:
        (DataModel): The HotpotQA input data model
    """
    return Question

`get_knowledge_data_model()`

Returns HotpotQA knowledge data model for pipeline configurations.

Returns:

Type	Description
`DataModel`	The HotpotQA knowledge data model

Source code in synalinks/src/datasets/hotpotqa.py

@synalinks_export("synalinks.datasets.hotpotqa.get_knowledge_data_model")
def get_knowledge_data_model():
    """
    Returns HotpotQA knowledge data model for pipeline configurations.

    Returns:
        (DataModel): The HotpotQA knowledge data model
    """
    return Document

`get_output_data_model()`

Returns HotpotQA output data model for pipeline configurations.

Returns:

Type	Description
`DataModel`	The HotpotQA output data model

Source code in synalinks/src/datasets/hotpotqa.py

@synalinks_export("synalinks.datasets.hotpotqa.get_output_data_model")
def get_output_data_model():
    """
    Returns HotpotQA output data model for pipeline configurations.

    Returns:
        (DataModel): The HotpotQA output data model
    """
    return Answer

`load_data()`

Load and format data from HuggingFace

Example:

(x_train, y_train), (x_test, y_test) = synalinks.datasets.hotpotqa.load_data()

Returns:

Type	Description
`tuple`	The train and test data ready for training

Source code in synalinks/src/datasets/hotpotqa.py

@synalinks_export("synalinks.datasets.hotpotqa.load_data")
def load_data():
    """
    Load and format data from HuggingFace

    Example:

    ```python
    (x_train, y_train), (x_test, y_test) = synalinks.datasets.hotpotqa.load_data()
    ```

    Returns:
        (tuple): The train and test data ready for training
    """
    x_train = []
    y_train = []
    x_test = []
    y_test = []

    train_examples = load_dataset(
        "hotpot_qa", "fullwiki", split="train", trust_remote_code=True
    )
    eval_examples = load_dataset(
        "hotpot_qa", "fullwiki", split="validation", trust_remote_code=True
    )

    for raw_example in train_examples:
        x_train.append(Question(question=raw_example["question"]))
        y_train.append(Answer(answer=raw_example["answer"]))

    for raw_example in eval_examples:
        if raw_example["level"] == "hard":
            x_test.append(Question(question=raw_example["question"]))
            y_test.append(Answer(answer=raw_example["answer"]))

    x_train = np.array(x_train, dtype="object")
    y_train = np.array(y_train, dtype="object")

    x_test = np.array(x_test, dtype="object")
    y_test = np.array(y_test, dtype="object")

    return (x_train, y_train), (x_test, y_test)

`load_knowledge()`

Load and format data from HuggingFace

Example:

knowledge = synalinks.datasets.hotpotqa.load_knowledge()

Returns:

Type	Description
`list`	The data ready for knowledge injestion

Source code in synalinks/src/datasets/hotpotqa.py

@synalinks_export("synalinks.datasets.hotpotqa.load_knowledge")
def load_knowledge():
    """
    Load and format data from HuggingFace

    Example:

    ```python
    knowledge = synalinks.datasets.hotpotqa.load_knowledge()
    ```

    Returns:
        (list): The  data ready for knowledge injestion
    """
    documents = []
    train_examples = load_dataset(
        "hotpot_qa", "fullwiki", split="train", trust_remote_code=True
    )
    for raw_example in train_examples:
        context = raw_example.get("context", None)
        if context:
            for i in range(len(context["title"])):
                documents.append(
                    Document(
                        title=context["title"][i],
                        text="\n".join(context["sentences"][i]),
                    )
                )
    documents = np.array(documents, dtype="object")
    return documents