Skip to content

NLP utilities

add_suffix(property_key, suffix)

Add a suffix to a property key.

Parameters:

Name Type Description Default
property_key str

The property key to process.

required
suffix int

The suffix to add.

required

Returns:

Type Description
str

The property key with the suffix added.

Source code in synalinks/src/utils/nlp_utils.py
def add_suffix(property_key, suffix):
    """
    Add a suffix to a property key.

    Args:
        property_key (str): The property key to process.
        suffix (int): The suffix to add.

    Returns:
        (str): The property key with the suffix added.
    """
    return f"{property_key}_{suffix}"

is_plural(property_key)

Check if the last word of a property key is in plural form.

Parameters:

Name Type Description Default
property_key str

The property key to check.

required

Returns:

Type Description
bool

True if the last word is plural, False otherwise.

Source code in synalinks/src/utils/nlp_utils.py
def is_plural(property_key):
    """
    Check if the last word of a property key is in plural form.

    Args:
        property_key (str): The property key to check.

    Returns:
        (bool): True if the last word is plural, False otherwise.
    """
    words = property_key.split("_")
    if len(words) > 1:
        noun = words[-1]
    else:
        noun = words[0]

    singular_form = to_singular(noun)
    return singular_form != noun

normalize_and_tokenize(text)

Normalize the text and tokenize it into words.

Parameters:

Name Type Description Default
text str

The text to process.

required

Returns:

Type Description
list

A list of normalized words.

Source code in synalinks/src/utils/nlp_utils.py
def normalize_and_tokenize(text):
    """
    Normalize the text and tokenize it into words.

    Args:
        text (str): The text to process.

    Returns:
        (list): A list of normalized words.
    """
    text = text.lower()
    text = remove_articles(text)
    text = remove_punctuation(text)
    return text.split()

normalize_text(text)

Normalize the text by converting to lowercase, removing articles, and removing punctuation.

Parameters:

Name Type Description Default
text str

The text to normalize.

required

Returns:

Type Description
str

The normalized text.

Source code in synalinks/src/utils/nlp_utils.py
def normalize_text(text):
    """
    Normalize the text by converting to lowercase, removing articles,
        and removing punctuation.

    Args:
        text (str): The text to normalize.

    Returns:
        (str): The normalized text.
    """
    return remove_articles(remove_punctuation(text.strip().lower()))

remove_articles(text)

Remove common English articles from the text.

Parameters:

Name Type Description Default
text str

The text to process.

required

Returns:

Type Description
str

The text with articles removed.

Source code in synalinks/src/utils/nlp_utils.py
def remove_articles(text):
    """
    Remove common English articles from the text.

    Args:
        text (str): The text to process.

    Returns:
        (str): The text with articles removed.
    """
    return " ".join(re.sub(ARTICLE_REGEX, "", text).split())

remove_numerical_suffix(property_key)

Remove the numerical suffix from a property key.

Parameters:

Name Type Description Default
property_key str

The property key to process.

required

Returns:

Type Description
str

The property key with the suffix removed.

Source code in synalinks/src/utils/nlp_utils.py
def remove_numerical_suffix(property_key):
    """
    Remove the numerical suffix from a property key.

    Args:
        property_key (str): The property key to process.

    Returns:
        (str): The property key with the suffix removed.
    """
    return re.sub(SUFFIX_PATTERN, "", property_key)

remove_punctuation(text)

Remove punctuation from the text.

Parameters:

Name Type Description Default
text str

The text to process.

required

Returns:

Type Description
str

The text with punctuation removed.

Source code in synalinks/src/utils/nlp_utils.py
def remove_punctuation(text):
    """
    Remove punctuation from the text.

    Args:
        text (str): The text to process.

    Returns:
        (str): The text with punctuation removed.
    """
    return text.translate(PUNCTUATION_TRANSLATOR)

to_plural(word)

Convert a singular word to its plural form.

Parameters:

Name Type Description Default
word str

The singular word to convert.

required

Returns:

Type Description
str

The plural form of the word.

Source code in synalinks/src/utils/nlp_utils.py
def to_plural(word):
    """
    Convert a singular word to its plural form.

    Args:
        word (str): The singular word to convert.

    Returns:
        (str): The plural form of the word.
    """
    if word in IRREGULAR_PLURALS:
        return IRREGULAR_PLURALS.get(word)
    else:
        # Use rules for regular plurals
        if Y_ENDING.search(word):
            return f"{word[:-1]}ies"
        elif S_ENDING.search(word) or SH_CH_ENDING.search(word):
            return f"{word}es"
        else:
            return f"{word}s"

to_plural_property(property_key)

Convert the last word of a property key to its plural form.

Parameters:

Name Type Description Default
property_key str

The property key to convert.

required

Returns:

Type Description
str

The property key with the last word in plural form.

Source code in synalinks/src/utils/nlp_utils.py
def to_plural_property(property_key):
    """
    Convert the last word of a property key to its plural form.

    Args:
        property_key (str): The property key to convert.

    Returns:
        (str): The property key with the last word in plural form.
    """
    words = property_key.split("_")
    if len(words) > 1:
        # Assume the last word is the noun
        words[-1] = to_plural(words[-1])
    else:
        words[0] = to_plural(words[0])
    return "_".join(words)

to_plural_without_numerical_suffix(property_key)

Convert a property key to its list (plural) form by removing the numerical suffix and converting to plural.

Parameters:

Name Type Description Default
property_key str

The property key to convert.

required

Returns:

Type Description
str

The list (plural) form of the property key.

Source code in synalinks/src/utils/nlp_utils.py
def to_plural_without_numerical_suffix(property_key):
    """
    Convert a property key to its list (plural) form by removing
        the numerical suffix and converting to plural.

    Args:
        property_key (str): The property key to convert.

    Returns:
        (str): The list (plural) form of the property key.
    """
    property_key = remove_numerical_suffix(property_key)
    return to_plural_property(property_key)

to_singular(word)

Convert a plural word to its singular form.

Parameters:

Name Type Description Default
word str

The plural word to convert.

required

Returns:

Type Description
str

The singular form of the word.

Source code in synalinks/src/utils/nlp_utils.py
def to_singular(word):
    """
    Convert a plural word to its singular form.

    Args:
        word (str): The plural word to convert.

    Returns:
        (str): The singular form of the word.
    """
    if word in IRREGULAR_SINGULARS:
        return IRREGULAR_SINGULARS.get(word)
    else:
        # Use rules for regular singulars
        if IES_ENDING.search(word):
            return f"{word[:-3]}y"
        elif ES_ENDING.search(word):
            if S_ENDING.search(word[:-2]) or SH_CH_ENDING.search(word[:-2]):
                return word[:-2]
            else:
                return word[:-1]
        elif word.endswith("s"):
            return word[:-1]
        else:
            return word

to_singular_property(property_key)

Convert the last word of a property key to its singular form.

Parameters:

Name Type Description Default
property_key str

The property key to convert.

required

Returns:

Type Description
str

The property key with the last word in singular form.

Source code in synalinks/src/utils/nlp_utils.py
def to_singular_property(property_key):
    """
    Convert the last word of a property key to its singular form.

    Args:
        property_key (str): The property key to convert.

    Returns:
        (str): The property key with the last word in singular form.
    """
    words = property_key.split("_")
    if len(words) > 1:
        # Assume the last word is the noun
        words[-1] = to_singular(words[-1])
    else:
        words[0] = to_singular(words[0])
    return "_".join(words)

to_singular_without_numerical_suffix(property_key)

Convert a property key to its base (singular) form by removing the numerical suffix and converting to singular.

Parameters:

Name Type Description Default
property_key str

The property key to convert.

required

Returns:

Type Description
str

The base (singular) form of the property key.

Source code in synalinks/src/utils/nlp_utils.py
def to_singular_without_numerical_suffix(property_key):
    """
    Convert a property key to its base (singular) form by removing
        the numerical suffix and converting to singular.

    Args:
        property_key (str): The property key to convert.

    Returns:
        (str): The base (singular) form of the property key.
    """
    property_key = remove_numerical_suffix(property_key)
    return to_singular_property(property_key)