Spaces:
Runtime error
Runtime error
"""Insert spaces, mypython/split_chinese.py.""" | |
import re | |
def insert_spaces(text: str, method: int = None) -> str: | |
r"""Insert space between Chinese characters. | |
To speed up, first check text contains more latin letters or Chinese charas, if more latin letters use insert_spaces(text,, 3) else use insert_spaces(text, None) | |
Args: | |
text: string of latin and Chinese chars | |
method: | |
None: default, re.sub(r"(?<=[a-zA-Z\d]) (?=[a-zA-Z\d])", "", text.replace("", " ")) # NOQA | |
1: re.sub(r"[一-龟]|[^ 一-龟]+", r"\g<0> ", text) | |
>>> insert_spaces("test亨利it四世上").strip() | |
'test 亨 利 it 四 世 上' | |
>>> insert_spaces("test亨利it四世上").strip().__len__() | |
17 | |
""" | |
if method is None: | |
if re.findall(r"[a-zA-Z ]+", text).__len__() > len(text) // 2: # more latin # NOQA | |
method = 3 | |
else: # more Chinese | |
method = 0 | |
if method == 0: | |
return re.sub(r"(?<=[a-zA-Z\d]) (?=[a-zA-Z\d])", "", text.replace("", " ")) | |
elif method == 1: | |
return re.sub(r"[一-龟]|[^ 一-龟]+", r"\g<0> ", text) | |
elif method == 2: | |
return re.sub(r"[一-龟]|\d+|\w+", r"\g<0> ", text) | |
elif method == 3: | |
return re.sub(r"(?<=[^a-zA-Z\d])|(?=[^a-zA-Z\d])", " ", text) | |
else: | |
return re.sub( | |
r"(?<=[a-zA-Z\d]) (?=[a-zA-Z\d])", "", text.replace("", " ") | |
) # NOQA | |