File Manager

Viewing File: /home/ubuntu/codegamaai-test/rag_drive/regdrive/lib/python3.10/site-packages/rapidfuzz/fuzz_py.py

# SPDX-License-Identifier: MIT
# Copyright (C) 2022 Max Bachmann
from __future__ import annotations

from math import ceil
from typing import Any, Callable, Hashable, Sequence

from rapidfuzz._common_py import conv_sequences
from rapidfuzz._utils import ScorerFlag, add_scorer_attrs, is_none
from rapidfuzz.distance import ScoreAlignment
from rapidfuzz.distance.Indel_py import (
    _block_normalized_similarity as indel_block_normalized_similarity,
)
from rapidfuzz.distance.Indel_py import distance as indel_distance
from rapidfuzz.distance.Indel_py import (
    normalized_similarity as indel_normalized_similarity,
)


def get_scorer_flags_fuzz(**_kwargs: Any) -> dict[str, Any]:
    return {
        "optimal_score": 100,
        "worst_score": 0,
        "flags": ScorerFlag.RESULT_F64 | ScorerFlag.SYMMETRIC,
    }


fuzz_attribute: dict[str, Callable[..., dict[str, Any]]] = {"get_scorer_flags": get_scorer_flags_fuzz}


def _norm_distance(dist: int, lensum: int, score_cutoff: float) -> float:
    score = (100 - 100 * dist / lensum) if lensum else 100
    return score if score >= score_cutoff else 0


def _split_sequence(seq: Sequence[Hashable]) -> list[Sequence[Hashable]]:
    if isinstance(seq, (str, bytes)):
        return seq.split()

    splitted_seq = [[]]
    for x in seq:
        ch = x if isinstance(x, str) else chr(x)
        if ch.isspace():
            splitted_seq.append([])
        else:
            splitted_seq[-1].append(x)

    return [tuple(x) for x in splitted_seq if x]


def _join_splitted_sequence(seq_list: list[Sequence[Hashable]]):
    if not seq_list:
        return ""
    if isinstance(next(iter(seq_list)), str):
        return " ".join(seq_list)
    if isinstance(next(iter(seq_list)), bytes):
        return b" ".join(seq_list)

    joined = []
    for seq in seq_list:
        joined += seq
        joined += [ord(" ")]
    return joined[:-1]


def ratio(
    s1: Sequence[Hashable] | None,
    s2: Sequence[Hashable] | None,
    *,
    processor: Callable[..., Sequence[Hashable]] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Calculates the normalized Indel distance.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    See Also
    --------
    rapidfuzz.distance.Indel.normalized_distance : Normalized Indel distance

    Notes
    -----
    .. image:: img/ratio.svg

    Examples
    --------
    >>> fuzz.ratio("this is a test", "this is a test!")
    96.55171966552734
    """
    if is_none(s1) or is_none(s2):
        return 0

    if score_cutoff is not None:
        score_cutoff /= 100

    score = indel_normalized_similarity(s1, s2, processor=processor, score_cutoff=score_cutoff)
    return score * 100


def _partial_ratio_short_needle(s1: Sequence[Hashable], s2: Sequence[Hashable], score_cutoff: float) -> ScoreAlignment:
    """
    implementation of partial_ratio for needles <= 64. assumes s1 is already the
    shorter string
    """
    s1_char_set = set(s1)
    len1 = len(s1)
    len2 = len(s2)

    res = ScoreAlignment(0, 0, len1, 0, len1)

    block: dict[Hashable, int] = {}
    block_get = block.get
    x = 1
    for ch1 in s1:
        block[ch1] = block_get(ch1, 0) | x
        x <<= 1

    for i in range(1, len1):
        substr_last = s2[i - 1]
        if substr_last not in s1_char_set:
            continue

        # todo cache map
        ls_ratio = indel_block_normalized_similarity(block, s1, s2[:i], score_cutoff=score_cutoff)
        if ls_ratio > res.score:
            res.score = score_cutoff = ls_ratio
            res.dest_start = 0
            res.dest_end = i
            if res.score == 1:
                res.score = 100
                return res

    for i in range(len2 - len1):
        substr_last = s2[i + len1 - 1]
        if substr_last not in s1_char_set:
            continue

        # todo cache map
        ls_ratio = indel_block_normalized_similarity(block, s1, s2[i : i + len1], score_cutoff=score_cutoff)
        if ls_ratio > res.score:
            res.score = score_cutoff = ls_ratio
            res.dest_start = i
            res.dest_end = i + len1
            if res.score == 1:
                res.score = 100
                return res

    for i in range(len2 - len1, len2):
        substr_first = s2[i]
        if substr_first not in s1_char_set:
            continue

        # todo cache map
        ls_ratio = indel_block_normalized_similarity(block, s1, s2[i:], score_cutoff=score_cutoff)
        if ls_ratio > res.score:
            res.score = score_cutoff = ls_ratio
            res.dest_start = i
            res.dest_end = len2
            if res.score == 1:
                res.score = 100
                return res

    res.score *= 100
    return res


def partial_ratio(
    s1: Sequence[Hashable] | None,
    s2: Sequence[Hashable] | None,
    *,
    processor: Callable[..., Sequence[Hashable]] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Searches for the optimal alignment of the shorter string in the
    longer string and returns the fuzz.ratio for this alignment.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    Depending on the length of the needle (shorter string) different
    implementations are used to improve the performance.

    short needle (length ≤ 64):
        When using a short needle length the fuzz.ratio is calculated for all
        alignments that could result in an optimal alignment. It is
        guaranteed to find the optimal alignment. For short needles this is very
        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst
        case performance of ``O(NM)``.

    .. image:: img/partial_ratio_short_needle.svg

    long needle (length > 64):
        For long needles a similar implementation to FuzzyWuzzy is used.
        This implementation only considers alignments which start at one
        of the longest common substrings. This results in a worst case performance
        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.
        The following Python code shows the concept:

        .. code-block:: python

            blocks = SequenceMatcher(None, needle, longer, False).get_matching_blocks()
            score = 0
            for block in blocks:
                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
                long_end = long_start + len(shorter)
                long_substr = longer[long_start:long_end]
                score = max(score, fuzz.ratio(needle, long_substr))

        This is a lot faster than checking all possible alignments. However it
        only finds one of the best alignments and not necessarily the optimal one.

    .. image:: img/partial_ratio_long_needle.svg

    Examples
    --------
    >>> fuzz.partial_ratio("this is a test", "this is a test!")
    100.0
    """
    alignment = partial_ratio_alignment(s1, s2, processor=processor, score_cutoff=score_cutoff)
    if alignment is None:
        return 0

    return alignment.score


def partial_ratio_alignment(
    s1: str | bytes | None,
    s2: str | bytes | None,
    *,
    processor: Callable[..., str | bytes] | None = None,
    score_cutoff: float | None = None,
) -> ScoreAlignment | None:
    """
    Searches for the optimal alignment of the shorter string in the
    longer string and returns the fuzz.ratio and the corresponding
    alignment.

    Parameters
    ----------
    s1 : str | bytes
        First string to compare.
    s2 : str | bytes
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff None is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    alignment : ScoreAlignment, optional
        alignment between s1 and s2 with the score as a float between 0 and 100

    Examples
    --------
    >>> s1 = "a certain string"
    >>> s2 = "cetain"
    >>> res = fuzz.partial_ratio_alignment(s1, s2)
    >>> res
    ScoreAlignment(score=83.33333333333334, src_start=2, src_end=8, dest_start=0, dest_end=6)

    Using the alignment information it is possible to calculate the same fuzz.ratio

    >>> fuzz.ratio(s1[res.src_start:res.src_end], s2[res.dest_start:res.dest_end])
    83.33333333333334
    """
    if is_none(s1) or is_none(s2):
        return None

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    if score_cutoff is None:
        score_cutoff = 0

    if not s1 and not s2:
        return ScoreAlignment(100.0, 0, 0, 0, 0)

    s1, s2 = conv_sequences(s1, s2)
    if len(s1) <= len(s2):
        shorter = s1
        longer = s2
    else:
        shorter = s2
        longer = s1

    res = _partial_ratio_short_needle(shorter, longer, score_cutoff / 100)
    if res.score != 100 and len(s1) == len(s2):
        score_cutoff = max(score_cutoff, res.score)
        res2 = _partial_ratio_short_needle(longer, shorter, score_cutoff / 100)
        if res2.score > res.score:
            res = ScoreAlignment(res2.score, res2.dest_start, res2.dest_end, res2.src_start, res2.src_end)

    if res.score < score_cutoff:
        return None

    if len(s1) <= len(s2):
        return res

    return ScoreAlignment(res.score, res.dest_start, res.dest_end, res.src_start, res.src_end)


def token_sort_ratio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Sorts the words in the strings and calculates the fuzz.ratio between them

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/token_sort_ratio.svg

    Examples
    --------
    >>> fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
    100.0
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    s1, s2 = conv_sequences(s1, s2)
    sorted_s1 = _join_splitted_sequence(sorted(_split_sequence(s1)))
    sorted_s2 = _join_splitted_sequence(sorted(_split_sequence(s2)))
    return ratio(sorted_s1, sorted_s2, score_cutoff=score_cutoff)


def token_set_ratio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Compares the words in the strings based on unique and common words between them
    using fuzz.ratio

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/token_set_ratio.svg

    Examples
    --------
    >>> fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
    83.8709716796875
    >>> fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
    100.0
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    if score_cutoff is None:
        score_cutoff = 0

    s1, s2 = conv_sequences(s1, s2)

    tokens_a = set(_split_sequence(s1))
    tokens_b = set(_split_sequence(s2))

    # in FuzzyWuzzy this returns 0. For sake of compatibility return 0 here as well
    # see https://github.com/rapidfuzz/RapidFuzz/issues/110
    if not tokens_a or not tokens_b:
        return 0

    intersect = tokens_a.intersection(tokens_b)
    diff_ab = tokens_a.difference(tokens_b)
    diff_ba = tokens_b.difference(tokens_a)

    # one sentence is part of the other one
    if intersect and (not diff_ab or not diff_ba):
        return 100

    diff_ab_joined = _join_splitted_sequence(sorted(diff_ab))
    diff_ba_joined = _join_splitted_sequence(sorted(diff_ba))

    ab_len = len(diff_ab_joined)
    ba_len = len(diff_ba_joined)
    # todo is length sum without joining faster?
    sect_len = len(_join_splitted_sequence(intersect))

    # string length sect+ab <-> sect and sect+ba <-> sect
    sect_ab_len = sect_len + (sect_len != 0) + ab_len
    sect_ba_len = sect_len + (sect_len != 0) + ba_len

    result = 0.0
    cutoff_distance = ceil((sect_ab_len + sect_ba_len) * (1 - score_cutoff / 100))
    dist = indel_distance(diff_ab_joined, diff_ba_joined, score_cutoff=cutoff_distance)

    if dist <= cutoff_distance:
        result = _norm_distance(dist, sect_ab_len + sect_ba_len, score_cutoff)

    # exit early since the other ratios are 0
    if not sect_len:
        return result

    # levenshtein distance sect+ab <-> sect and sect+ba <-> sect
    # since only sect is similar in them the distance can be calculated based on
    # the length difference
    sect_ab_dist = (sect_len != 0) + ab_len
    sect_ab_ratio = _norm_distance(sect_ab_dist, sect_len + sect_ab_len, score_cutoff)

    sect_ba_dist = (sect_len != 0) + ba_len
    sect_ba_ratio = _norm_distance(sect_ba_dist, sect_len + sect_ba_len, score_cutoff)

    return max(result, sect_ab_ratio, sect_ba_ratio)


def token_ratio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio
    (faster than manually executing the two functions)

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/token_ratio.svg
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    # todo write combined implementation
    return max(
        token_set_ratio(s1, s2, processor=None, score_cutoff=score_cutoff),
        token_sort_ratio(s1, s2, processor=None, score_cutoff=score_cutoff),
    )


def partial_token_sort_ratio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    sorts the words in the strings and calculates the fuzz.partial_ratio between them

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/partial_token_sort_ratio.svg
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    s1, s2 = conv_sequences(s1, s2)
    sorted_s1 = _join_splitted_sequence(sorted(_split_sequence(s1)))
    sorted_s2 = _join_splitted_sequence(sorted(_split_sequence(s2)))
    return partial_ratio(sorted_s1, sorted_s2, score_cutoff=score_cutoff)


def partial_token_set_ratio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Compares the words in the strings based on unique and common words between them
    using fuzz.partial_ratio

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/partial_token_set_ratio.svg
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    s1, s2 = conv_sequences(s1, s2)

    tokens_a = set(_split_sequence(s1))
    tokens_b = set(_split_sequence(s2))
    # in FuzzyWuzzy this returns 0. For sake of compatibility return 0 here as well
    # see https://github.com/rapidfuzz/RapidFuzz/issues/110
    if not tokens_a or not tokens_b:
        return 0

    # exit early when there is a common word in both sequences
    if tokens_a.intersection(tokens_b):
        return 100

    diff_ab = _join_splitted_sequence(sorted(tokens_a.difference(tokens_b)))
    diff_ba = _join_splitted_sequence(sorted(tokens_b.difference(tokens_a)))
    return partial_ratio(diff_ab, diff_ba, score_cutoff=score_cutoff)


def partial_token_ratio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Helper method that returns the maximum of fuzz.partial_token_set_ratio and
    fuzz.partial_token_sort_ratio (faster than manually executing the two functions)

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/partial_token_ratio.svg
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    if score_cutoff is None:
        score_cutoff = 0

    s1, s2 = conv_sequences(s1, s2)

    tokens_split_a = _split_sequence(s1)
    tokens_split_b = _split_sequence(s2)
    tokens_a = set(tokens_split_a)
    tokens_b = set(tokens_split_b)

    # exit early when there is a common word in both sequences
    if tokens_a.intersection(tokens_b):
        return 100

    diff_ab = tokens_a.difference(tokens_b)
    diff_ba = tokens_b.difference(tokens_a)

    result = partial_ratio(
        _join_splitted_sequence(sorted(tokens_split_a)),
        _join_splitted_sequence(sorted(tokens_split_b)),
        score_cutoff=score_cutoff,
    )

    # do not calculate the same partial_ratio twice
    if len(tokens_split_a) == len(diff_ab) and len(tokens_split_b) == len(diff_ba):
        return result

    score_cutoff = max(score_cutoff, result)
    return max(
        result,
        partial_ratio(
            _join_splitted_sequence(sorted(diff_ab)),
            _join_splitted_sequence(sorted(diff_ba)),
            score_cutoff=score_cutoff,
        ),
    )


def WRatio(
    s1: str | None,
    s2: str | None,
    *,
    processor: Callable[..., str] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Calculates a weighted ratio based on the other ratio algorithms

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/WRatio.svg
    """
    if is_none(s1) or is_none(s2):
        return 0

    UNBASE_SCALE = 0.95

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)

    # in FuzzyWuzzy this returns 0. For sake of compatibility return 0 here as well
    # see https://github.com/rapidfuzz/RapidFuzz/issues/110
    if not s1 or not s2:
        return 0

    if score_cutoff is None:
        score_cutoff = 0

    len1 = len(s1)
    len2 = len(s2)
    len_ratio = len1 / len2 if len1 > len2 else len2 / len1

    end_ratio = ratio(s1, s2, score_cutoff=score_cutoff)
    if len_ratio < 1.5:
        score_cutoff = max(score_cutoff, end_ratio) / UNBASE_SCALE
        return max(
            end_ratio,
            token_ratio(s1, s2, score_cutoff=score_cutoff, processor=None) * UNBASE_SCALE,
        )

    PARTIAL_SCALE = 0.9 if len_ratio < 8.0 else 0.6
    score_cutoff = max(score_cutoff, end_ratio) / PARTIAL_SCALE
    end_ratio = max(end_ratio, partial_ratio(s1, s2, score_cutoff=score_cutoff) * PARTIAL_SCALE)

    score_cutoff = max(score_cutoff, end_ratio) / UNBASE_SCALE
    return max(
        end_ratio,
        partial_token_ratio(s1, s2, score_cutoff=score_cutoff, processor=None) * UNBASE_SCALE * PARTIAL_SCALE,
    )


def QRatio(
    s1: Sequence[Hashable] | None,
    s2: Sequence[Hashable] | None,
    *,
    processor: Callable[..., Sequence[Hashable]] | None = None,
    score_cutoff: float | None = None,
) -> float:
    """
    Calculates a quick ratio between two strings using fuzz.ratio.

    Since v3.0 this behaves similar to fuzz.ratio with the exception that this
    returns 0 when comparing two empty strings

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Examples
    --------
    >>> fuzz.QRatio("this is a test", "this is a test!")
    96.55171966552734
    """
    if is_none(s1) or is_none(s2):
        return 0

    if processor is not None:
        s1 = processor(s1)
        s2 = processor(s2)
    # in FuzzyWuzzy this returns 0. For sake of compatibility return 0 here as well
    # see https://github.com/rapidfuzz/RapidFuzz/issues/110
    if not s1 or not s2:
        return 0

    return ratio(s1, s2, score_cutoff=score_cutoff)


add_scorer_attrs(ratio, fuzz_attribute)
add_scorer_attrs(partial_ratio, fuzz_attribute)
add_scorer_attrs(token_sort_ratio, fuzz_attribute)
add_scorer_attrs(token_set_ratio, fuzz_attribute)
add_scorer_attrs(token_ratio, fuzz_attribute)
add_scorer_attrs(partial_token_sort_ratio, fuzz_attribute)
add_scorer_attrs(partial_token_set_ratio, fuzz_attribute)
add_scorer_attrs(partial_token_ratio, fuzz_attribute)
add_scorer_attrs(WRatio, fuzz_attribute)
add_scorer_attrs(QRatio, fuzz_attribute)
Back to Directory File Manager