Viewing File: /home/ubuntu/.local/lib/python3.10/site-packages/pykakasi/scripts.py
# -*- coding: utf-8 -*-
# scripts.py
#
# Copyright 2011-2019 Hiroshi Miura <miurahr@linux.com>
import functools
import pickle
from typing import Dict
from .properties import Ch, Configurations, Convert_Tables
class IConv:
_MAXLEN: int = 32
def __init__(self):
self._hahconv = H2("a", method="Hepburn")
self._hakconv = H2("a", method="Kunrei")
self._hapconv = H2("a", method="Passport")
self._hkconv = H2("K")
self._khconv = K2("H")
self._saconv = Sym2("a")
@functools.lru_cache(maxsize=256)
def convert(self, otext: str, hira: str) -> Dict[str, str]:
kana = self._h2k(hira)
hira = self._k2h(hira) # make sure hiragana doesn't contain katakana
tmp = {
"orig": otext,
"hira": hira,
"kana": kana,
"hepburn": self._s2a(self._h2ah(hira)),
"kunrei": self._s2a(self._h2ak(hira)),
"passport": self._s2a(self._h2ap(hira)),
}
return tmp
def _s2a(self, text: str) -> str:
result = "" # type: str
i = 0
length = len(text)
while i < length:
w = min(i + self._MAXLEN, length) # type: int
(t, l1) = self._saconv.convert(text[i:w])
if l1 > 0:
result += t
i += l1
elif text[i] in Ch.long_symbols: # handle chōonpu sound marks
# use previous char as a transliteration for kana-dash
if len(result) > 0:
result += result[-1]
else:
result += "-"
i += 1
else:
result += text[i : i + 1]
i += 1
return result
def _k2h(self, text: str) -> str:
result = ""
i = 0
while i < len(text):
w = min(i + self._MAXLEN, len(text))
(t, l1) = self._khconv.convert(text[i:w])
if l1 > 0:
result += t
i += l1
else:
result += text[i : i + 1]
i += 1
return result
def _h2k(self, text: str) -> str:
result = ""
i = 0
while i < len(text):
w = min(i + self._MAXLEN, len(text))
(t, l1) = self._hkconv.convert(text[i:w])
if l1 > 0:
result += t
i += l1
else:
result += text[i : i + 1]
i += 1
return result
def _h2ak(self, text: str) -> str:
result = ""
i = 0
while i < len(text):
w = min(i + self._MAXLEN, len(text))
(t, l1) = self._hakconv.convert(text[i:w])
if l1 > 0:
result += t
i += l1
else:
result += text[i : i + 1]
i += 1
return result
def _h2ah(self, text: str) -> str:
result = ""
i = 0
while i < len(text):
w = min(i + self._MAXLEN, len(text))
(t, l1) = self._hahconv.convert(text[i:w])
if l1 > 0:
result += t
i += l1
else:
result += text[i : i + 1]
i += 1
return result
def _h2ap(self, text: str) -> str:
result = ""
i = 0
while i < len(text):
w = min(i + self._MAXLEN, len(text))
(t, l1) = self._hapconv.convert(text[i:w])
if l1 > 0:
result += t
i += l1
else:
result += text[i : i + 1]
i += 1
return result
class H2:
_kanadict = None
_diff = 0x30A1 - 0x3041 # KATAKANA LETTER A - HIRAGANA A
_ediff = 0x1B164 - 0x1B150
def __init__(self, mode, method="Hepburn"):
if mode == "a":
if method == "Passport":
self._kanadict = Jisyo(Configurations.jisyo_passport_hira)
elif method == "Kunrei":
self._kanadict = Jisyo(Configurations.jisyo_kunrei_hira)
else:
self._kanadict = Jisyo(Configurations.jisyo_hepburn_hira)
self.convert = self.convert_a
elif mode == "K":
self.convert = self.convert_K
else:
self.convert = self.convert_noop
@classmethod
def isRegion(cls, char):
return 0x3040 < ord(char[0]) < 0x3097 or 0x1B150 <= ord(char[0]) <= 0x1B152
def convert_a(self, text):
Hstr = ""
max_len = -1
r = min(self._kanadict.maxkeylen(), len(text))
for x in range(1, r + 1):
if self._kanadict.haskey(text[:x]):
if max_len < x:
max_len = x
Hstr = self._kanadict.lookup(text[:x])
return (Hstr, max_len)
def convert_K(self, text):
Hstr = ""
max_len = 0
r = len(text)
for x in range(r):
if 0x3040 < ord(text[x]) < 0x3097:
Hstr = Hstr + chr(ord(text[x]) + self._diff)
max_len += 1
elif 0x1B150 <= ord(text[x]) <= 0x1B152:
Hstr = Hstr + chr(ord(text[x]) + self._ediff)
max_len += 1
else: # pragma: no cover
break
return (Hstr, max_len)
def convert_noop(self, text):
return (text[0], 1)
class K2:
_kanadict = None
_halfkanadict = None
_diff = 0x30A1 - 0x3041 # KATAKANA LETTER A - HIRAGANA A
_ediff = 0x1B164 - 0x1B150
def __init__(self, mode, method="Hepburn"):
self._halfkanadict = Jisyo(Configurations.jisyo_halfkana)
if mode == "a":
if method == "Passport":
self._kanadict = Jisyo(Configurations.jisyo_passport)
elif method == "Kunrei":
self._kanadict = Jisyo(Configurations.jisyo_kunrei)
else:
self._kanadict = Jisyo(Configurations.jisyo_hepburn)
self.convert = self.convert_a
elif mode == "H":
self.convert = self.convert_h
else:
self.convert = self.convert_noop
@classmethod
def isRegion(cls, char):
ch = ord(char[0])
return (
cls._is_katakana(ch)
or cls._is_half_width_kana(ch)
or 0x1B164 <= ch <= 0x1B167
)
@classmethod
def _is_katakana(cls, ch):
return 0x30A0 < ch < 0x30FD
@classmethod
def _is_half_width_kana(cls, ch):
return 0xFF65 < ch < 0xFF9F
def _convert_half_kana(self, text):
Hstr = ""
max_len = -1
for x in [2, 1]:
if self._halfkanadict.haskey(text[:x]):
max_len = x
Hstr = self._halfkanadict.lookup(text[:x])
break
return Hstr, max_len
def convert_a(self, text):
Hstr = ""
max_len = -1
r = min(self._kanadict.maxkeylen(), len(text))
for x in range(1, r + 1):
if self._kanadict.haskey(text[:x]):
if max_len < x:
max_len = x
Hstr = self._kanadict.lookup(text[:x])
return Hstr, max_len
def convert_h(self, text):
Hstr = ""
max_len = 0
r = len(text)
x = 0
while x < r:
if 0x1B164 <= ord(text[x]) < 0x1B167:
Hstr = Hstr + chr(ord(text[x]) - self._ediff)
max_len += 1
x += 1
elif ord(text[x]) == 0x1B167:
Hstr = Hstr + "\u3093"
max_len += 1
x += 1
elif 0x30A0 < ord(text[x]) < 0x30F7:
Hstr = Hstr + chr(ord(text[x]) - self._diff)
max_len += 1
x += 1
elif 0x30F7 <= ord(text[x]) < 0x30FD:
Hstr = Hstr + text[x]
max_len += 1
x += 1
elif self._is_half_width_kana(ord(text[x])):
kstr, length = self._convert_half_kana(text[x:])
if length > 0:
max_len += length
x += length
if ord(kstr) == 0x309B:
Hstr = Hstr + kstr
else:
Hstr = Hstr + chr(ord(kstr) - self._diff)
else:
max_len += 1
x += 1 # skip unknown character(issue #115)
else: # pragma: no cover
break
return (Hstr, max_len)
def convert_noop(self, text):
return text[0], 1
class Jisyo:
_dict = None
def __init__(self, dictname):
src = Configurations.dictpath(dictname)
with open(src, "rb") as d:
self._dict = pickle.load(d)
def haskey(self, key):
return key in self._dict
def lookup(self, key):
return self._dict[key]
def maxkeylen(self):
return self._dict["_max_key_len_"]
class Sym2:
def __init__(self, mode):
if mode == "a":
self.convert = self.convert_a
else:
self.convert = self.convert_noop
@classmethod
def isRegion(cls, char: str):
c = ord(char[0])
return (
(Ch.ideographic_space <= c <= Ch.postal_mark_face)
or (Ch.wavy_dash <= c <= Ch.ideographic_half_fill_space)
or (Ch.greece_Alpha <= c <= Ch.greece_Rho)
or (Ch.greece_Sigma <= c <= Ch.greece_Omega)
or (Ch.greece_alpha <= c <= Ch.greece_omega)
or (Ch.cyrillic_A <= c <= Ch.cyrillic_ya)
or (Ch.zenkaku_exc_mark <= c <= Ch.zenkaku_number_nine)
or (0xFF20 <= c <= 0xFF5E)
or c == 0x0451
or c == 0x0401
)
def _convert(self, text):
c = ord(text[0])
if Ch.ideographic_space <= c <= Ch.postal_mark_face:
return Convert_Tables.symbol_table_1[c - Ch.ideographic_space]
elif Ch.wavy_dash <= c <= Ch.ideographic_half_fill_space:
return Convert_Tables.symbol_table_2[c - Ch.wavy_dash]
elif Ch.greece_Alpha <= c <= Ch.greece_Omega:
return Convert_Tables.symbol_table_3[c - Ch.greece_Alpha]
elif Ch.greece_alpha <= c <= Ch.greece_omega:
return Convert_Tables.symbol_table_4[c - Ch.greece_alpha]
elif Ch.cyrillic_A <= c <= Ch.cyrillic_ya:
return Convert_Tables.cyrillic_table[text[0]]
elif c == Ch.cyrillic_E or c == Ch.cyrillic_e:
return Convert_Tables.cyrillic_table[text[0]]
elif Ch.zenkaku_exc_mark <= c <= Ch.zenkaku_slash_mark:
return Convert_Tables.symbol_table_5[c - Ch.zenkaku_exc_mark]
elif Ch.zenkaku_number_zero <= c <= Ch.zenkaku_number_nine:
return chr(c - Ch.zenkaku_number_zero + ord("0"))
elif 0xFF20 <= c <= 0xFF40:
return chr(0x0041 + c - 0xFF21) # u\ff21A => u\0041:@A..Z[\]^_`
elif 0xFF41 <= c < 0xFF5F:
return chr(0x0061 + c - 0xFF41) # u\ff41a => u\0061:a..z{|}
else:
return "" # pragma: no cover
def convert_a(self, text):
t = self._convert(text)
if t is not None and len(t) > 0:
return t, 1
else:
return "", 0
def convert_noop(self, text):
return text[0], 1
class A2:
def __init__(self, mode):
if mode == "E":
self.convert = self.convert_E
else:
self.convert = self.convert_noop
@classmethod
def isRegion(cls, char):
return Ch.space <= ord(char[0]) < Ch.delete
def _convert(self, text):
c = ord(text[0])
if Ch.space <= c <= Ch.at_mark:
return Convert_Tables.alpha_table_1[(c - Ch.space)]
elif Ch.alphabet_A <= c <= Ch.alphabet_Z:
return chr(Ch.zenkaku_A + c - Ch.alphabet_A) # u\0041A => u\ff21A
elif Ch.square_bra <= c <= Ch.back_quote:
return Convert_Tables.alpha_table_2[(c - Ch.square_bra)]
elif Ch.alphabet_a <= c <= Ch.alphabet_z:
return chr(Ch.zenkaku_a + c - Ch.alphabet_a) # u\0061a => u\ff41a
elif Ch.bracket_bra <= c <= Ch.tilda:
return Convert_Tables.alpha_table_3[(c - Ch.bracket_bra)]
else:
return "" # pragma: no cover
def convert_E(self, text):
t = self._convert(text)
if len(t):
return t, 1
else:
return "", 0
def convert_noop(self, text):
return text[0], 1
Back to Directory
File Manager