Knowledge Bases API

`KnowledgeBase`

Bases: SynalinksSaveable

A generic graph knowledge base.

Using Neo4j graph database

import synalinks
import os

class Document(synalinks.Entity):
    title: str
    content: str

class Chunk(synalinks.Entity):
    content: str

class IsPartOf(synalinks.Relation):
    source: Chunk
    target: Document

embedding_model = synalinks.EmbeddingModel(
    model="ollama/mxbai-embed-large"
)

os.environ["NEO4J_DATABASE"] = "your-neo4j-db" # (Default to "neo4j")
os.environ["NEO4J_USERNAME"] = "your-neo4j-username" # (Default to "neo4j")
os.environ["NEO4J_PASSWORD"] = "your-neo4j-password" # (Default to "neo4j")

knowledge_base = synalinks.KnowledgeBase(
    uri="neo4j://localhost:7687",
    entity_models=[Document, Chunk],
    relation_models=[IsPartOf],
    embedding_model=embedding_model,
    metric="cosine",
    wipe_on_start=False,
)

Learn more about Neo4J in their documentation here

Using MemGraph graph database

os.environ["MEMGRAPH_DATABASE"] = "your-memgraph-db" # (Default to "memgraph")
os.environ["MEMGRAPH_USERNAME"] = "your-memgraph-username" # (Default to "memgraph")
os.environ["MEMGRAPH_PASSWORD"] = "your-memgraph-password" # (Default to "memgraph")

knowledge_base = synalinks.KnowledgeBase(
    uri="memgraph://localhost:7687",
    entity_models=[Document, Chunk],
    relation_models=[IsPartOf],
    embedding_model=embedding_model,
    metric="cosine",
    wipe_on_start=False,
)

Learn more about MemGraph in their documentation here

Note: Obviously, use an .env file and .gitignore to avoid putting your username and password in the code or a config file that can lead to leackage when pushing it into repositories.

Parameters:

Name	Type	Description	Default
`uri`	`str`	The index name/url of the database.	`None`
`entity_models`	`list`	The entity models being a list of `Entity`.	`None`
`relation_models`	`list`	The relation models being a list of `Relation`.	`None`
`embedding_model`	`EmbeddingModel`	The embedding model.	`None`
`metric`	`str`	The metric to use for the vector index (`cosine` or `euclidean`).	`'cosine'`
`wipe_on_start`	`bool`	Wether or not to wipe the graph database at start (Default to False).	`False`

Source code in synalinks/src/knowledge_bases/knowledge_base.py

@synalinks_export("synalinks.KnowledgeBase")
class KnowledgeBase(SynalinksSaveable):
    """A generic graph knowledge base.

    ### Using Neo4j graph database

    ```python
    import synalinks
    import os

    class Document(synalinks.Entity):
        title: str
        content: str

    class Chunk(synalinks.Entity):
        content: str

    class IsPartOf(synalinks.Relation):
        source: Chunk
        target: Document

    embedding_model = synalinks.EmbeddingModel(
        model="ollama/mxbai-embed-large"
    )

    os.environ["NEO4J_DATABASE"] = "your-neo4j-db" # (Default to "neo4j")
    os.environ["NEO4J_USERNAME"] = "your-neo4j-username" # (Default to "neo4j")
    os.environ["NEO4J_PASSWORD"] = "your-neo4j-password" # (Default to "neo4j")

    knowledge_base = synalinks.KnowledgeBase(
        uri="neo4j://localhost:7687",
        entity_models=[Document, Chunk],
        relation_models=[IsPartOf],
        embedding_model=embedding_model,
        metric="cosine",
        wipe_on_start=False,
    )
    ```

    Learn more about Neo4J in their documentation **[here](https://neo4j.com/docs/)**

    ### Using MemGraph graph database

    ```python
    os.environ["MEMGRAPH_DATABASE"] = "your-memgraph-db" # (Default to "memgraph")
    os.environ["MEMGRAPH_USERNAME"] = "your-memgraph-username" # (Default to "memgraph")
    os.environ["MEMGRAPH_PASSWORD"] = "your-memgraph-password" # (Default to "memgraph")

    knowledge_base = synalinks.KnowledgeBase(
        uri="memgraph://localhost:7687",
        entity_models=[Document, Chunk],
        relation_models=[IsPartOf],
        embedding_model=embedding_model,
        metric="cosine",
        wipe_on_start=False,
    )
    ```

    Learn more about MemGraph in their documentation **[here](https://memgraph.com/docs)**

    **Note**: Obviously, use an `.env` file and `.gitignore` to avoid putting
    your username and password in the code or a config file that can lead to
    leackage when pushing it into repositories.

    Args:
        uri (str): The index name/url of the database.
        entity_models (list): The entity models being a list of `Entity`.
        relation_models (list): The relation models being a list of `Relation`.
        embedding_model (EmbeddingModel): The embedding model.
        metric (str): The metric to use for the vector index (`cosine` or `euclidean`).
        wipe_on_start (bool): Wether or not to wipe the graph database at start
            (Default to False).
    """

    def __init__(
        self,
        uri=None,
        entity_models=None,
        relation_models=None,
        embedding_model=None,
        metric="cosine",
        wipe_on_start=False,
    ):
        self.adapter = database_adapters.get(uri)(
            uri=uri,
            entity_models=entity_models,
            relation_models=relation_models,
            embedding_model=embedding_model,
            metric=metric,
            wipe_on_start=wipe_on_start,
        )
        self.uri = uri
        self.entity_models = entity_models
        self.relation_models = relation_models
        self.embedding_model = embedding_model
        self.metric = metric
        self.wipe_on_start = wipe_on_start

    async def update(
        self,
        data_model,
        threshold=0.8,
    ):
        """Update the knowledge base with new data.

        Adds or updates entities and relationships in the knowledge graph based on
        the provided data model. Perform alignment operations to
        merge similar entities.

        Args:
            data_model (JsonDataModel | DataModel): The data model containing entities
                and relations to be added or updated in the knowledge base.
                Should conform to the entity or relation models defined during
                initialization.
            threshold (float): Similarity threshold for entity alignment.
                Entities with similarity above this threshold will be merged.
                Should be between 0.0 and 1.0 (Defaults to 0.8).
        """
        maybe_initialize_telemetry()
        return await self.adapter.update(data_model)

    async def query(self, query: str, params: Dict[str, Any] = None, **kwargs):
        """Execute a query against the knowledge base.

        Args:
            query (str): The Cypher query to execute. The format depends on the
                underlying database adapter (e.g., Cypher for Neo4j).

        Returns:
            (GenericResult): the query results
        """
        maybe_initialize_telemetry()
        return await self.adapter.query(query, params=params, **kwargs)

    async def similarity_search(
        self,
        similarity_search,
        k=10,
        threshold=0.8,
    ):
        """Perform similarity search to find entities similar to the given text.

        Uses vector embeddings to find entities in the knowledge base that are
        semantically similar to the provided text query.

        Args:
            similarity_search (JsonDataModel): The `SimilaritySearch` data model.
            k (int): Maximum number of similar entities to return.
                Defaults to 10.
            threshold (float): Minimum similarity score for results.
                Entities with similarity below this threshold are excluded.
                Should be between 0.0 and 1.0 (Defaults to 0.8).
        """
        maybe_initialize_telemetry()
        return await self.adapter.similarity_search(
            similarity_search,
            k=k,
            threshold=threshold,
        )

    async def triplet_search(
        self,
        triplet_search,
        k=10,
        threshold=0.8,
    ):
        """Search for triplets in the knowledge graph.

        Finds relationship triplets in the knowledge base that match or are similar
        to the provided triplet pattern.

        Args:
            triplet_search (JsonDataModel): The `TripletSearch` data model.
            k (int): Maximum number of matching triplets to return.
                (Defaults to 10).
            threshold (float, optional): Minimum similarity score for triplet matches.
                Triplets with similarity below this threshold are excluded.
                Should be between 0.0 and 1.0. (Defaults to 0.8).
        """
        maybe_initialize_telemetry()
        return await self.adapter.triplet_search(
            triplet_search,
            k=k,
            threshold=threshold,
        )

    def get_config(self):
        config = {
            "uri": self.uri,
            "metric": self.metric,
            "wipe_on_start": self.wipe_on_start,
        }
        entity_models_config = {
            "entity_models": [
                (
                    serialization_lib.serialize_synalinks_object(
                        entity_model.to_symbolic_data_model()
                    )
                    if not is_symbolic_data_model(entity_model)
                    else serialization_lib.serialize_synalinks_object(entity_model)
                )
                for entity_model in self.entity_models
            ]
        }
        relation_models_config = {
            "relation_models": [
                (
                    serialization_lib.serialize_synalinks_object(
                        relation_model.to_symbolic_data_model()
                    )
                    if not is_symbolic_data_model(relation_model)
                    else serialization_lib.serialize_synalinks_object(relation_model)
                )
                for relation_model in self.relation_models
            ]
        }
        embedding_model_config = {
            "embedding_model": serialization_lib.serialize_synalinks_object(
                self.embedding_model,
            )
        }
        return {
            **entity_models_config,
            **relation_models_config,
            **embedding_model_config,
            **config,
        }

    @classmethod
    def from_config(cls, config):
        entity_models_config = config.pop("entity_models")
        entity_models = [
            serialization_lib.deserialize_synalinks_object(entity_model)
            for entity_model in entity_models_config
        ]
        relation_models_config = config.pop("relation_models")
        relation_models = [
            serialization_lib.deserialize_synalinks_object(relation_model)
            for relation_model in relation_models_config
        ]
        embedding_model = serialization_lib.deserialize_synalinks_object(
            config.pop("embedding_model"),
        )
        return cls(
            entity_models=entity_models,
            relation_models=relation_models,
            embedding_model=embedding_model,
            **config,
        )

`query(query, params=None, **kwargs)` `async`

Execute a query against the knowledge base.

Parameters:

Name	Type	Description	Default
`query`	`str`	The Cypher query to execute. The format depends on the underlying database adapter (e.g., Cypher for Neo4j).	required

Returns:

Type	Description
`GenericResult`	the query results

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def query(self, query: str, params: Dict[str, Any] = None, **kwargs):
    """Execute a query against the knowledge base.

    Args:
        query (str): The Cypher query to execute. The format depends on the
            underlying database adapter (e.g., Cypher for Neo4j).

    Returns:
        (GenericResult): the query results
    """
    maybe_initialize_telemetry()
    return await self.adapter.query(query, params=params, **kwargs)

`similarity_search(similarity_search, k=10, threshold=0.8)` `async`

Perform similarity search to find entities similar to the given text.

Uses vector embeddings to find entities in the knowledge base that are semantically similar to the provided text query.

Parameters:

Name	Type	Description	Default
`similarity_search`	`JsonDataModel`	The `SimilaritySearch` data model.	required
`k`	`int`	Maximum number of similar entities to return. Defaults to 10.	`10`
`threshold`	`float`	Minimum similarity score for results. Entities with similarity below this threshold are excluded. Should be between 0.0 and 1.0 (Defaults to 0.8).	`0.8`

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def similarity_search(
    self,
    similarity_search,
    k=10,
    threshold=0.8,
):
    """Perform similarity search to find entities similar to the given text.

    Uses vector embeddings to find entities in the knowledge base that are
    semantically similar to the provided text query.

    Args:
        similarity_search (JsonDataModel): The `SimilaritySearch` data model.
        k (int): Maximum number of similar entities to return.
            Defaults to 10.
        threshold (float): Minimum similarity score for results.
            Entities with similarity below this threshold are excluded.
            Should be between 0.0 and 1.0 (Defaults to 0.8).
    """
    maybe_initialize_telemetry()
    return await self.adapter.similarity_search(
        similarity_search,
        k=k,
        threshold=threshold,
    )

`triplet_search(triplet_search, k=10, threshold=0.8)` `async`

Search for triplets in the knowledge graph.

Finds relationship triplets in the knowledge base that match or are similar to the provided triplet pattern.

Parameters:

Name	Type	Description	Default
`triplet_search`	`JsonDataModel`	The `TripletSearch` data model.	required
`k`	`int`	Maximum number of matching triplets to return. (Defaults to 10).	`10`
`threshold`	`float`	Minimum similarity score for triplet matches. Triplets with similarity below this threshold are excluded. Should be between 0.0 and 1.0. (Defaults to 0.8).	`0.8`

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def triplet_search(
    self,
    triplet_search,
    k=10,
    threshold=0.8,
):
    """Search for triplets in the knowledge graph.

    Finds relationship triplets in the knowledge base that match or are similar
    to the provided triplet pattern.

    Args:
        triplet_search (JsonDataModel): The `TripletSearch` data model.
        k (int): Maximum number of matching triplets to return.
            (Defaults to 10).
        threshold (float, optional): Minimum similarity score for triplet matches.
            Triplets with similarity below this threshold are excluded.
            Should be between 0.0 and 1.0. (Defaults to 0.8).
    """
    maybe_initialize_telemetry()
    return await self.adapter.triplet_search(
        triplet_search,
        k=k,
        threshold=threshold,
    )

`update(data_model, threshold=0.8)` `async`

Update the knowledge base with new data.

Adds or updates entities and relationships in the knowledge graph based on the provided data model. Perform alignment operations to merge similar entities.

Parameters:

Name	Type	Description	Default
`data_model`	`JsonDataModel \| DataModel`	The data model containing entities and relations to be added or updated in the knowledge base. Should conform to the entity or relation models defined during initialization.	required
`threshold`	`float`	Similarity threshold for entity alignment. Entities with similarity above this threshold will be merged. Should be between 0.0 and 1.0 (Defaults to 0.8).	`0.8`

Source code in synalinks/src/knowledge_bases/knowledge_base.py

async def update(
    self,
    data_model,
    threshold=0.8,
):
    """Update the knowledge base with new data.

    Adds or updates entities and relationships in the knowledge graph based on
    the provided data model. Perform alignment operations to
    merge similar entities.

    Args:
        data_model (JsonDataModel | DataModel): The data model containing entities
            and relations to be added or updated in the knowledge base.
            Should conform to the entity or relation models defined during
            initialization.
        threshold (float): Similarity threshold for entity alignment.
            Entities with similarity above this threshold will be merged.
            Should be between 0.0 and 1.0 (Defaults to 0.8).
    """
    maybe_initialize_telemetry()
    return await self.adapter.update(data_model)

Knowledge Bases API

KnowledgeBase

Using Neo4j graph database

Using MemGraph graph database

query(query, params=None, **kwargs) async

similarity_search(similarity_search, k=10, threshold=0.8) async

triplet_search(triplet_search, k=10, threshold=0.8) async

update(data_model, threshold=0.8) async

`KnowledgeBase`

`query(query, params=None, **kwargs)` `async`

`similarity_search(similarity_search, k=10, threshold=0.8)` `async`

`triplet_search(triplet_search, k=10, threshold=0.8)` `async`

`update(data_model, threshold=0.8)` `async`