# -*- coding: utf-8 -*-
from array import array
def lcsubstrings(seq1, seq2, positions=False):
"""Find the longest common substring(s) in the sequences `seq1` and `seq2`.
If positions evaluates to `True` only their positions will be returned,
together with their length, in a tuple:
(length, [(start pos in seq1, start pos in seq2)..])
Otherwise, the substrings themselves will be returned, in a set.
Example:
>>> lcsubstrings("sedentar", "dentist")
{'dent'}
>>> lcsubstrings("sedentar", "dentist", positions=True)
(4, [(2, 0)])
"""
L1, L2 = len(seq1), len(seq2)
ms = []
mlen = last = 0
if L1 < L2:
seq1, seq2 = seq2, seq1
L1, L2 = L2, L1
column = array('L', range(L2))
for i in range(L1):
for j in range(L2):
old = column[j]
if seq1[i] == seq2[j]:
if i == 0 or j == 0:
column[j] = 1
else:
column[j] = last + 1
if column[j] > mlen:
mlen = column[j]
ms = [(i, j)]
elif column[j] == mlen:
ms.append((i, j))
else:
column[j] = 0
last = old
if positions:
return (mlen, tuple((i - mlen + 1, j - mlen + 1) for i, j in ms if ms))
return set(seq1[i - mlen + 1:i + 1] for i, _ in ms if ms)