Knowledge Bases API

`KnowledgeBase`

Bases: SynalinksSaveable

A knowledge base for storing and retrieving structured data.

The KnowledgeBase provides a unified interface for storing structured data with support for full-text search and optional vector similarity search. It uses DuckDB as the underlying storage engine.

Basic Usage

import synalinks

class Document(synalinks.DataModel):
    id: str
    title: str
    content: str

# Create a knowledge base without embeddings (full-text search only)
knowledge_base = synalinks.KnowledgeBase(
    uri="duckdb://my_database.db",
    data_models=[Document],
)

# Store a document
doc = Document(id="1", title="Hello", content="Hello World!")
await knowledge_base.update(doc.to_json_data_model())

# Retrieve by ID
result = await knowledge_base.get("1", [Document.to_symbolic_data_model()])

# Full-text search
results = await knowledge_base.fulltext_search("Hello", k=10)

With Vector Similarity Search

embedding_model = synalinks.EmbeddingModel(
    model="ollama/mxbai-embed-large"
)

knowledge_base = synalinks.KnowledgeBase(
    uri="duckdb://./my_database.db",
    data_models=[Document],
    embedding_model=embedding_model,
    metric="cosine",
)

# Hybrid search (combines full-text and vector similarity)
results = await knowledge_base.hybrid_search("semantic query", k=10)

Retrieving Table Definitions

# Get all symbolic data models (table definitions) from the database
symbolic_models = knowledge_base.get_symbolic_data_models()

for model in symbolic_models:
    print(model.get_schema())
    # {'title': 'Document', 'type': 'object', 'properties': {...}, ...}

Parameters:

Name	Type	Description	Default
`uri`	`str`	The database connection URI. Use "duckdb://path/to/db.db" for DuckDB. If not provided, uses an in-memory database.	`None`
`data_models`	`list`	Optional list of DataModel or SymbolicDataModel classes to create tables for.	`None`
`embedding_model`	`EmbeddingModel`	Optional embedding model for vector similarity search.	`None`
`metric`	`str`	The distance metric for vector search. Options: "cosine", "l2seq", "ip" (default: "cosine").	`'cosine'`
`wipe_on_start`	`bool`	Whether to clear the database on initialization (default: False).	`False`
`name`	`str`	Optional name for the knowledge base (used for serialization).	`None`

Source code in synalinks/src/knowledge_bases/knowledge_base.py

@synalinks_export("synalinks.KnowledgeBase")
class KnowledgeBase(SynalinksSaveable):
    """A knowledge base for storing and retrieving structured data.

    The KnowledgeBase provides a unified interface for storing structured data
    with support for full-text search and optional vector similarity search.
    It uses DuckDB as the underlying storage engine.

    ### Basic Usage

    ```python
    import synalinks

    class Document(synalinks.DataModel):
        id: str
        title: str
        content: str

    # Create a knowledge base without embeddings (full-text search only)
    knowledge_base = synalinks.KnowledgeBase(
        uri="duckdb://my_database.db",
        data_models=[Document],
    )

    # Store a document
    doc = Document(id="1", title="Hello", content="Hello World!")
    await knowledge_base.update(doc.to_json_data_model())

    # Retrieve by ID
    result = await knowledge_base.get("1", [Document.to_symbolic_data_model()])

    # Full-text search
    results = await knowledge_base.fulltext_search("Hello", k=10)
    ```

    ### With Vector Similarity Search

    ```python
    embedding_model = synalinks.EmbeddingModel(
        model="ollama/mxbai-embed-large"
    )

    knowledge_base = synalinks.KnowledgeBase(
        uri="duckdb://./my_database.db",
        data_models=[Document],
        embedding_model=embedding_model,
        metric="cosine",
    )

    # Hybrid search (combines full-text and vector similarity)
    results = await knowledge_base.hybrid_search("semantic query", k=10)
    ```

    ### Retrieving Table Definitions

    ```python
    # Get all symbolic data models (table definitions) from the database
    symbolic_models = knowledge_base.get_symbolic_data_models()

    for model in symbolic_models:
        print(model.get_schema())
        # {'title': 'Document', 'type': 'object', 'properties': {...}, ...}
    ```

    Args:
        uri (str): The database connection URI. Use "duckdb://path/to/db.db"
            for DuckDB. If not provided, uses an in-memory database.
        data_models (list): Optional list of DataModel or SymbolicDataModel
            classes to create tables for.
        embedding_model (EmbeddingModel): Optional embedding model for
            vector similarity search.
        metric (str): The distance metric for vector search.
            Options: "cosine", "l2seq", "ip" (default: "cosine").
        wipe_on_start (bool): Whether to clear the database on initialization
            (default: False).
        name (str): Optional name for the knowledge base (used for serialization).
    """

    def __init__(
        self,
        uri=None,
        data_models=None,
        embedding_model=None,
        metric="cosine",
        wipe_on_start=False,
        name=None,
    ):
        self.adapter = database_adapters.get(uri)(
            uri=uri,
            data_models=data_models,
            embedding_model=embedding_model,
            metric=metric,
            wipe_on_start=wipe_on_start,
            name=name,
        )
        self.uri = uri
        self.data_models = data_models or []
        self.embedding_model = _get_em(embedding_model)
        self.metric = metric
        self.wipe_on_start = wipe_on_start
        if not name:
            self.name = auto_name("knowledge_base")
        else:
            self.name = name

    async def update(
        self,
        data_model_or_data_models: Union[Any, List[Any]],
    ) -> Union[Any, List[Any]]:
        """Insert or update records in the knowledge base.

        Args:
            data_model_or_data_models (JsonDataModel | List[JsonDataModel]):
                A single JsonDataModel or a list of JsonDataModels to insert
                or update. Uses the first field as the primary key for upserts.

        Returns:
            The primary key value(s) of the inserted/updated records.
        """
        return await self.adapter.update(data_model_or_data_models)

    async def get(
        self,
        id_or_ids: Any,
        data_models: Optional[List[Any]] = None,
    ) -> Optional[Any]:
        """Retrieve a record by its primary key.

        Args:
            id_or_ids: The primary key value to look up.
            data_models: Optional list of SymbolicDataModels to search in.
                If not provided, searches all tables.

        Returns:
            JsonDataModel if found, None otherwise.
        """
        return await self.adapter.get(id_or_ids, data_models=data_models)

    async def getall(
        self,
        data_model: Any,
        limit: int = 50,
        offset: int = 0,
    ) -> List[Any]:
        """Retrieve all records from a table with pagination.

        Args:
            data_model: The SymbolicDataModel representing the table to query.
            limit: Maximum number of records to return (default: 50).
            offset: Number of records to skip (default: 0).

        Returns:
            List of JsonDataModels.
        """
        return await self.adapter.getall(data_model, limit=limit, offset=offset)

    async def query(
        self,
        query: str,
        params: Optional[Dict[str, Any]] = None,
        **kwargs,
    ) -> List[Dict[str, Any]]:
        """Execute a raw SQL query against the knowledge base.

        Args:
            query (str): The SQL query to execute.
            params (dict): Optional list of parameters for parameterized queries.
            **kwargs (Any): Additional options (e.g., read_only=True/False).

        Returns:
            List of result dictionaries.
        """
        return await self.adapter.query(query, params=params, **kwargs)

    async def similarity_search(
        self,
        text_or_texts: Union[str, List[str]],
        data_models: Optional[List[Any]] = None,
        k: int = 10,
        threshold: Optional[float] = None,
    ) -> List[Dict[str, Any]]:
        """Perform vector similarity search using embeddings.

        Requires an embedding_model to be configured.

        Args:
            text_or_texts: Query text or list of query texts.
            data_models: Optional list of SymbolicDataModels to search in.
            k: Maximum number of results to return (default: 10).
            threshold: Optional maximum distance threshold for filtering.

        Returns:
            List of matching records with similarity scores.
        """
        return await self.adapter.similarity_search(
            text_or_texts,
            data_models=data_models,
            k=k,
            threshold=threshold,
        )

    async def fulltext_search(
        self,
        text_or_texts: Union[str, List[str]],
        data_models: Optional[List[Any]] = None,
        k: int = 10,
        threshold: Optional[float] = None,
    ) -> List[Dict[str, Any]]:
        """Perform full-text search using BM25 ranking.

        Searches text fields (description, text, content, message, name,
        query, question) for matching documents.

        Args:
            text_or_texts: Query text or list of query texts.
            data_models: Optional list of SymbolicDataModels to search in.
            k: Maximum number of results to return (default: 10).
            threshold: Optional minimum BM25 score threshold.

        Returns:
            List of matching records with relevance scores.
        """
        return await self.adapter.fulltext_search(
            text_or_texts,
            data_models=data_models,
            k=k,
            threshold=threshold,
        )

    async def hybrid_search(
        self,
        text_or_texts: Union[str, List[str]],
        data_models: Optional[List[Any]] = None,
        k: int = 10,
        k_rank: int = 60,
        similarity_threshold: Optional[float] = None,
        fulltext_threshold: Optional[float] = None,
    ) -> List[Dict[str, Any]]:
        """Perform hybrid search combining vector similarity and full-text.

        Uses Reciprocal Rank Fusion (RRF) to combine results from both
        similarity search and full-text search. Falls back to full-text
        search only if no embedding model is configured.

        Args:
            text_or_texts: Query text or list of query texts.
            data_models: Optional list of SymbolicDataModels to search in.
            k: Maximum number of results to return (default: 10).
            k_rank: RRF smoothing constant. Lower values emphasize top ranks
                more strongly (default: 60).
            similarity_threshold: Optional threshold for vector similarity.
            fulltext_threshold: Optional threshold for full-text relevance.

        Returns:
            List of matching records with combined scores.
        """
        return await self.adapter.hybrid_search(
            text_or_texts,
            data_models=data_models,
            k=k,
            k_rank=k_rank,
            similarity_threshold=similarity_threshold,
            fulltext_threshold=fulltext_threshold,
        )

    def get_symbolic_data_models(self) -> List[Any]:
        """Retrieve all symbolic data models (table definitions) from the database.

        Returns a list of SymbolicDataModel objects representing each table
        in the database. This is useful for introspecting the database schema
        or for passing to search methods to limit the search scope.

        Returns:
            list: List of symbolic data models representing the database tables.

        Example:
            ```python
            symbolic_models = knowledge_base.get_symbolic_data_models()
            for model in symbolic_models:
                schema = model.get_schema()
                print(f"Table: {schema['title']}")
                print(f"Fields: {list(schema['properties'].keys())}")
            ```
        """
        return self.adapter.get_symbolic_data_models()

    def get_config(self):
        config = {
            "uri": self.uri,
            "name": self.name,
            "metric": self.metric,
            "wipe_on_start": self.wipe_on_start,
        }
        data_models_config = {
            "data_models": [
                (
                    serialization_lib.serialize_synalinks_object(
                        data_model.to_symbolic_data_model(
                            name="data_model" + (f"_{i}_" if i > 0 else "_") + self.name
                        )
                    )
                    if not is_symbolic_data_model(data_model)
                    else serialization_lib.serialize_synalinks_object(data_model)
                )
                for i, data_model in enumerate(self.data_models)
            ]
        }
        embedding_model_config = {}
        if self.embedding_model:
            embedding_model_config = {
                "embedding_model": serialization_lib.serialize_synalinks_object(
                    self.embedding_model,
                )
            }
        return {
            **data_models_config,
            **embedding_model_config,
            **config,
        }

    @classmethod
    def from_config(cls, config):
        data_models_config = config.pop("data_models", [])
        data_models = [
            serialization_lib.deserialize_synalinks_object(data_model)
            for data_model in data_models_config
        ]
        embedding_model = None
        if "embedding_model" in config:
            embedding_model = serialization_lib.deserialize_synalinks_object(
                config.pop("embedding_model"),
            )
        return cls(
            data_models=data_models,
            embedding_model=embedding_model,
            **config,
        )

`fulltext_search(text_or_texts, data_models=None, k=10, threshold=None)` `async`

Perform full-text search using BM25 ranking.

Searches text fields (description, text, content, message, name, query, question) for matching documents.

Parameters:

Name	Type	Description	Default
`text_or_texts`	`Union[str, List[str]]`	Query text or list of query texts.	required
`data_models`	`Optional[List[Any]]`	Optional list of SymbolicDataModels to search in.	`None`
`k`	`int`	Maximum number of results to return (default: 10).	`10`
`threshold`	`Optional[float]`	Optional minimum BM25 score threshold.	`None`

Returns:

Type	Description
`List[Dict[str, Any]]`	List of matching records with relevance scores.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def fulltext_search(
    self,
    text_or_texts: Union[str, List[str]],
    data_models: Optional[List[Any]] = None,
    k: int = 10,
    threshold: Optional[float] = None,
) -> List[Dict[str, Any]]:
    """Perform full-text search using BM25 ranking.

    Searches text fields (description, text, content, message, name,
    query, question) for matching documents.

    Args:
        text_or_texts: Query text or list of query texts.
        data_models: Optional list of SymbolicDataModels to search in.
        k: Maximum number of results to return (default: 10).
        threshold: Optional minimum BM25 score threshold.

    Returns:
        List of matching records with relevance scores.
    """
    return await self.adapter.fulltext_search(
        text_or_texts,
        data_models=data_models,
        k=k,
        threshold=threshold,
    )

`get(id_or_ids, data_models=None)` `async`

Retrieve a record by its primary key.

Parameters:

Name	Type	Description	Default
`id_or_ids`	`Any`	The primary key value to look up.	required
`data_models`	`Optional[List[Any]]`	Optional list of SymbolicDataModels to search in. If not provided, searches all tables.	`None`

Returns:

Type	Description
`Optional[Any]`	JsonDataModel if found, None otherwise.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def get(
    self,
    id_or_ids: Any,
    data_models: Optional[List[Any]] = None,
) -> Optional[Any]:
    """Retrieve a record by its primary key.

    Args:
        id_or_ids: The primary key value to look up.
        data_models: Optional list of SymbolicDataModels to search in.
            If not provided, searches all tables.

    Returns:
        JsonDataModel if found, None otherwise.
    """
    return await self.adapter.get(id_or_ids, data_models=data_models)

`get_symbolic_data_models()`

Retrieve all symbolic data models (table definitions) from the database.

Returns a list of SymbolicDataModel objects representing each table in the database. This is useful for introspecting the database schema or for passing to search methods to limit the search scope.

Returns:

Name	Type	Description
`list`	`List[Any]`	List of symbolic data models representing the database tables.

Example

symbolic_models = knowledge_base.get_symbolic_data_models()
for model in symbolic_models:
    schema = model.get_schema()
    print(f"Table: {schema['title']}")
    print(f"Fields: {list(schema['properties'].keys())}")

Source code in synalinks/src/knowledge_bases/knowledge_base.py

def get_symbolic_data_models(self) -> List[Any]:
    """Retrieve all symbolic data models (table definitions) from the database.

    Returns a list of SymbolicDataModel objects representing each table
    in the database. This is useful for introspecting the database schema
    or for passing to search methods to limit the search scope.

    Returns:
        list: List of symbolic data models representing the database tables.

    Example:
        ```python
        symbolic_models = knowledge_base.get_symbolic_data_models()
        for model in symbolic_models:
            schema = model.get_schema()
            print(f"Table: {schema['title']}")
            print(f"Fields: {list(schema['properties'].keys())}")
        ```
    """
    return self.adapter.get_symbolic_data_models()

`getall(data_model, limit=50, offset=0)` `async`

Retrieve all records from a table with pagination.

Parameters:

Name	Type	Description	Default
`data_model`	`Any`	The SymbolicDataModel representing the table to query.	required
`limit`	`int`	Maximum number of records to return (default: 50).	`50`
`offset`	`int`	Number of records to skip (default: 0).	`0`

Returns:

Type	Description
`List[Any]`	List of JsonDataModels.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def getall(
    self,
    data_model: Any,
    limit: int = 50,
    offset: int = 0,
) -> List[Any]:
    """Retrieve all records from a table with pagination.

    Args:
        data_model: The SymbolicDataModel representing the table to query.
        limit: Maximum number of records to return (default: 50).
        offset: Number of records to skip (default: 0).

    Returns:
        List of JsonDataModels.
    """
    return await self.adapter.getall(data_model, limit=limit, offset=offset)

`hybrid_search(text_or_texts, data_models=None, k=10, k_rank=60, similarity_threshold=None, fulltext_threshold=None)` `async`

Perform hybrid search combining vector similarity and full-text.

Uses Reciprocal Rank Fusion (RRF) to combine results from both similarity search and full-text search. Falls back to full-text search only if no embedding model is configured.

Parameters:

Name	Type	Description	Default
`text_or_texts`	`Union[str, List[str]]`	Query text or list of query texts.	required
`data_models`	`Optional[List[Any]]`	Optional list of SymbolicDataModels to search in.	`None`
`k`	`int`	Maximum number of results to return (default: 10).	`10`
`k_rank`	`int`	RRF smoothing constant. Lower values emphasize top ranks more strongly (default: 60).	`60`
`similarity_threshold`	`Optional[float]`	Optional threshold for vector similarity.	`None`
`fulltext_threshold`	`Optional[float]`	Optional threshold for full-text relevance.	`None`

Returns:

Type	Description
`List[Dict[str, Any]]`	List of matching records with combined scores.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def hybrid_search(
    self,
    text_or_texts: Union[str, List[str]],
    data_models: Optional[List[Any]] = None,
    k: int = 10,
    k_rank: int = 60,
    similarity_threshold: Optional[float] = None,
    fulltext_threshold: Optional[float] = None,
) -> List[Dict[str, Any]]:
    """Perform hybrid search combining vector similarity and full-text.

    Uses Reciprocal Rank Fusion (RRF) to combine results from both
    similarity search and full-text search. Falls back to full-text
    search only if no embedding model is configured.

    Args:
        text_or_texts: Query text or list of query texts.
        data_models: Optional list of SymbolicDataModels to search in.
        k: Maximum number of results to return (default: 10).
        k_rank: RRF smoothing constant. Lower values emphasize top ranks
            more strongly (default: 60).
        similarity_threshold: Optional threshold for vector similarity.
        fulltext_threshold: Optional threshold for full-text relevance.

    Returns:
        List of matching records with combined scores.
    """
    return await self.adapter.hybrid_search(
        text_or_texts,
        data_models=data_models,
        k=k,
        k_rank=k_rank,
        similarity_threshold=similarity_threshold,
        fulltext_threshold=fulltext_threshold,
    )

`query(query, params=None, **kwargs)` `async`

Execute a raw SQL query against the knowledge base.

Parameters:

Name	Type	Description	Default
`query`	`str`	The SQL query to execute.	required
`params`	`dict`	Optional list of parameters for parameterized queries.	`None`
`**kwargs`	`Any`	Additional options (e.g., read_only=True/False).	`{}`

Returns:

Type	Description
`List[Dict[str, Any]]`	List of result dictionaries.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def query(
    self,
    query: str,
    params: Optional[Dict[str, Any]] = None,
    **kwargs,
) -> List[Dict[str, Any]]:
    """Execute a raw SQL query against the knowledge base.

    Args:
        query (str): The SQL query to execute.
        params (dict): Optional list of parameters for parameterized queries.
        **kwargs (Any): Additional options (e.g., read_only=True/False).

    Returns:
        List of result dictionaries.
    """
    return await self.adapter.query(query, params=params, **kwargs)

`similarity_search(text_or_texts, data_models=None, k=10, threshold=None)` `async`

Perform vector similarity search using embeddings.

Requires an embedding_model to be configured.

Parameters:

Name	Type	Description	Default
`text_or_texts`	`Union[str, List[str]]`	Query text or list of query texts.	required
`data_models`	`Optional[List[Any]]`	Optional list of SymbolicDataModels to search in.	`None`
`k`	`int`	Maximum number of results to return (default: 10).	`10`
`threshold`	`Optional[float]`	Optional maximum distance threshold for filtering.	`None`

Returns:

Type	Description
`List[Dict[str, Any]]`	List of matching records with similarity scores.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def similarity_search(
    self,
    text_or_texts: Union[str, List[str]],
    data_models: Optional[List[Any]] = None,
    k: int = 10,
    threshold: Optional[float] = None,
) -> List[Dict[str, Any]]:
    """Perform vector similarity search using embeddings.

    Requires an embedding_model to be configured.

    Args:
        text_or_texts: Query text or list of query texts.
        data_models: Optional list of SymbolicDataModels to search in.
        k: Maximum number of results to return (default: 10).
        threshold: Optional maximum distance threshold for filtering.

    Returns:
        List of matching records with similarity scores.
    """
    return await self.adapter.similarity_search(
        text_or_texts,
        data_models=data_models,
        k=k,
        threshold=threshold,
    )

`update(data_model_or_data_models)` `async`

Insert or update records in the knowledge base.

Parameters:

Name	Type	Description	Default
`data_model_or_data_models`	`JsonDataModel \| List[JsonDataModel]`	A single JsonDataModel or a list of JsonDataModels to insert or update. Uses the first field as the primary key for upserts.	required

Returns:

Type	Description
`Union[Any, List[Any]]`	The primary key value(s) of the inserted/updated records.

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def update(
    self,
    data_model_or_data_models: Union[Any, List[Any]],
) -> Union[Any, List[Any]]:
    """Insert or update records in the knowledge base.

    Args:
        data_model_or_data_models (JsonDataModel | List[JsonDataModel]):
            A single JsonDataModel or a list of JsonDataModels to insert
            or update. Uses the first field as the primary key for upserts.

    Returns:
        The primary key value(s) of the inserted/updated records.
    """
    return await self.adapter.update(data_model_or_data_models)

Knowledge Bases API

KnowledgeBase

Basic Usage

With Vector Similarity Search

Retrieving Table Definitions

fulltext_search(text_or_texts, data_models=None, k=10, threshold=None) async

get(id_or_ids, data_models=None) async

get_symbolic_data_models()

getall(data_model, limit=50, offset=0) async

hybrid_search(text_or_texts, data_models=None, k=10, k_rank=60, similarity_threshold=None, fulltext_threshold=None) async

query(query, params=None, **kwargs) async

similarity_search(text_or_texts, data_models=None, k=10, threshold=None) async

update(data_model_or_data_models) async

`KnowledgeBase`

`fulltext_search(text_or_texts, data_models=None, k=10, threshold=None)` `async`

`get(id_or_ids, data_models=None)` `async`

`get_symbolic_data_models()`

`getall(data_model, limit=50, offset=0)` `async`

`hybrid_search(text_or_texts, data_models=None, k=10, k_rank=60, similarity_threshold=None, fulltext_threshold=None)` `async`

`query(query, params=None, **kwargs)` `async`

`similarity_search(text_or_texts, data_models=None, k=10, threshold=None)` `async`

`update(data_model_or_data_models)` `async`