Skip to content

FewShotOPRO

FewShotOPRO

Bases: Optimizer

Sample randomly among the best examples to populate the LM's prompt to make it learn using Few Shot Learning while generating instructions with OPRO.

Example:

import synalinks
import asyncio

async def main():
    # ... your program definition

    program.compile(
        reward=synalinks.rewards.ExactMatch(),
        optimizer=synalinks.optimizers.FewShotOPRO(
            language_model=language_model,
            k=3, # The number of examples to provide to the prompt
            k_best=10, # The number of best examples to select from
        ),
    )

    history = await program.fit(...)
References

Parameters:

Name Type Description Default
language_model LanguageModel

The language model to use.

None
k int

The number of examples to select (default 3) among the best predictions.

3
k_best int

The max number of best predictions/instructions to select from (default 10).

10
program Program

The program to use. Optional. If None create one at start.

None
name str

The name of the optimizer.

None
description str

The description of the optimizer.

None
Source code in synalinks/src/optimizers/few_shot_opro.py
@synalinks_export("synalinks.optimizers.FewShotOPRO")
class FewShotOPRO(Optimizer):
    """Sample randomly among the best examples to populate the LM's prompt to make it
        learn using Few Shot Learning while generating instructions with OPRO.

    Example:

    ```python
    import synalinks
    import asyncio

    async def main():
        # ... your program definition

        program.compile(
            reward=synalinks.rewards.ExactMatch(),
            optimizer=synalinks.optimizers.FewShotOPRO(
                language_model=language_model,
                k=3, # The number of examples to provide to the prompt
                k_best=10, # The number of best examples to select from
            ),
        )

        history = await program.fit(...)
    ```

    References:
        - [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165)
        - [Large Language Models as Optimizers](https://arxiv.org/abs/2309.03409)

    Args:
        language_model (LanguageModel): The language model to use.
        k (int): The number of examples to select (default 3) among the best predictions.
        k_best (int): The max number of best predictions/instructions to select from
            (default 10).
        program (Program): The program to use. Optional. If None create one at start.
        name (str): The name of the optimizer.
        description (str): The description of the optimizer.
    """

    def __init__(
        self,
        language_model=None,
        k=3,
        k_best=10,
        program=None,
        name=None,
        description=None,
    ):
        super().__init__(
            name=name,
            description=description,
            data_model=FewShotOPROOptimizedVariables,
        )
        self.language_model = language_model
        self.k = k
        self.k_best = k_best
        self.program = program

    async def build(self, variables):
        if not self.program:
            opro_inputs = Input(data_model=OPROInputs)
            opro_outputs = await Generator(
                language_model=self.language_model,
                data_model=Instructions,
                instructions=[
                    "Your task is to generate instructions that maximize rewards.",
                    "Below are some previous instructions with their reward.",
                    "Generate instructions that is different from all the instructions.",
                    (
                        "The instructions should be concise, effective and generally"
                        " applicable to all predictions below."
                    ),
                ],
            )(opro_inputs)

            self.program = Program(
                inputs=opro_inputs,
                outputs=opro_outputs,
                name="opro",
                description="OPRO Program",
            )
        self.built = True

    async def optimize(self, trainable_variable, reward=None):
        """Perform a backprop/optimization on a single variable."""
        # Reward backpropagation
        predictions = trainable_variable.get("predictions")
        backpropagated_predictions = []
        backprop_pred_nb = 0
        for p in predictions:
            if p["reward"] is None:
                p["reward"] = reward
                backprop_pred_nb += 1
            backpropagated_predictions.append(p)
        if backprop_pred_nb > 0:
            trainable_variable.update({"predictions": backpropagated_predictions})
            # Get the k best predictions (sorted by reward)
            sorted_predictions = sorted(
                backpropagated_predictions,
                key=lambda x: x["reward"] if x["reward"] is not None else float("-inf"),
                reverse=True,
            )
            top_k_predictions = sorted_predictions[: self.k_best]
            if len(top_k_predictions) > self.k:
                selected_predictions = random.sample(top_k_predictions, self.k)
            else:
                selected_predictions = top_k_predictions
            # Get the k best instructions candidates (sorted by reward)
            sorted_instructions_candidates = sorted(
                trainable_variable.get("instructions_candidates"),
                key=lambda x: x["reward"] if x["reward"] is not None else float("-inf"),
                reverse=True,
            )
            top_k_instructions_candidates = sorted_instructions_candidates[: self.k_best]
            # Prepare inputs for OPRO
            inputs = OPROInputs(
                predictions=top_k_predictions,
                instructions_candidates=top_k_instructions_candidates,
            )
            new_instructions = await self.program(inputs)
            trainable_variable.update(
                {
                    "instructions": new_instructions.get_json(),
                    "examples": selected_predictions,
                }
            )

    async def finalize(self, trainable_variable):
        """Finalize the optimization of a single variable (cleanup/scaling etc.)."""
        trainable_variable.update(
            {
                "predictions": [],
                "instructions_candidates": []
            }
        )

    def get_config(self):
        config = {
            "k": self.k,
            "k_best": self.k_best,
            "name": self.name,
            "description": self.description,
        }
        language_model_config = {
            "language_model": serialization_lib.serialize_synalinks_object(
                self.language_model,
            )
        }
        program_config = {
            "program": serialization_lib.serialize_synalinks_object(
                self.program,
            )
        }
        return {**config, **language_model_config, **program_config}

    @classmethod
    def from_config(cls, config):
        language_model = serialization_lib.deserialize_synalinks_object(
            config.pop("language_model"),
        )
        program = serialization_lib.deserialize_synalinks_object(
            config.pop("program"),
        )
        return cls(language_model=language_model, program=program, **config)

finalize(trainable_variable) async

Finalize the optimization of a single variable (cleanup/scaling etc.).

Source code in synalinks/src/optimizers/few_shot_opro.py
async def finalize(self, trainable_variable):
    """Finalize the optimization of a single variable (cleanup/scaling etc.)."""
    trainable_variable.update(
        {
            "predictions": [],
            "instructions_candidates": []
        }
    )

optimize(trainable_variable, reward=None) async

Perform a backprop/optimization on a single variable.

Source code in synalinks/src/optimizers/few_shot_opro.py
async def optimize(self, trainable_variable, reward=None):
    """Perform a backprop/optimization on a single variable."""
    # Reward backpropagation
    predictions = trainable_variable.get("predictions")
    backpropagated_predictions = []
    backprop_pred_nb = 0
    for p in predictions:
        if p["reward"] is None:
            p["reward"] = reward
            backprop_pred_nb += 1
        backpropagated_predictions.append(p)
    if backprop_pred_nb > 0:
        trainable_variable.update({"predictions": backpropagated_predictions})
        # Get the k best predictions (sorted by reward)
        sorted_predictions = sorted(
            backpropagated_predictions,
            key=lambda x: x["reward"] if x["reward"] is not None else float("-inf"),
            reverse=True,
        )
        top_k_predictions = sorted_predictions[: self.k_best]
        if len(top_k_predictions) > self.k:
            selected_predictions = random.sample(top_k_predictions, self.k)
        else:
            selected_predictions = top_k_predictions
        # Get the k best instructions candidates (sorted by reward)
        sorted_instructions_candidates = sorted(
            trainable_variable.get("instructions_candidates"),
            key=lambda x: x["reward"] if x["reward"] is not None else float("-inf"),
            reverse=True,
        )
        top_k_instructions_candidates = sorted_instructions_candidates[: self.k_best]
        # Prepare inputs for OPRO
        inputs = OPROInputs(
            predictions=top_k_predictions,
            instructions_candidates=top_k_instructions_candidates,
        )
        new_instructions = await self.program(inputs)
        trainable_variable.update(
            {
                "instructions": new_instructions.get_json(),
                "examples": selected_predictions,
            }
        )