File Manager

Viewing File: /home/ubuntu/.local/lib/python3.10/site-packages/distance/_iterators.py

from ._pyimports import levenshtein, fast_comp

def ilevenshtein(seq1, seqs, max_dist=-1):
	"""Compute the Levenshtein distance between the sequence `seq1` and the series
	of	sequences `seqs`.
	
		`seq1`: the reference sequence
		`seqs`: a series of sequences (can be a generator)
		`max_dist`: if provided and > 0, only the sequences which distance from
		the reference sequence is lower or equal to this value will be returned.
	
	The return value is a series of pairs (distance, sequence).
	
	The sequence objects in `seqs` are expected to be of the same kind than
	the reference sequence in the C implementation; the same holds true for
	`ifast_comp`.
	"""
	for seq2 in seqs:
		dist = levenshtein(seq1, seq2, max_dist=max_dist)
		if dist != -1:
			yield dist, seq2


def ifast_comp(seq1, seqs, transpositions=False):
	"""Return an iterator over all the sequences in `seqs` which distance from
	`seq1` is lower or equal to 2. The sequences which distance from the
	reference sequence is higher than that are dropped.
	
		`seq1`: the reference sequence.
		`seqs`: a series of sequences (can be a generator)
		`transpositions` has the same sense than in `fast_comp`.
	
	The return value is a series of pairs (distance, sequence).
	
	You might want to call `sorted()` on the iterator to get the results in a
	significant order:
	
		>>> g = ifast_comp("foo", ["fo", "bar", "foob", "foo", "foobaz"])
		>>> sorted(g)
		[(0, 'foo'), (1, 'fo'), (1, 'foob')]
	"""
	for seq2 in seqs:
		dist = fast_comp(seq1, seq2, transpositions)
		if dist != -1:
			yield dist, seq2

Back to Directory File Manager