mirror of
https://github.com/jfilter/clean-text.git
synced 2021-09-19 22:32:58 +03:00
improve phone regex (fix #10)
This commit is contained in:
@@ -42,8 +42,9 @@ EMAIL_REGEX = re.compile(
|
||||
flags=re.IGNORECASE | re.UNICODE,
|
||||
)
|
||||
|
||||
# for more information: https://github.com/jfilter/clean-text/issues/10
|
||||
PHONE_REGEX = re.compile(
|
||||
r"(?:^|(?<=[^\w)]))(\+?1[ .-]?)?(\(?\d{3}\)?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W))"
|
||||
r"((?:^|(?<=[^\w)]))(((\+?[01])|(\+\d{2}))[ .-]?)?(\(?\d{3,4}\)?/?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W)))|\+?\d{4,5}[ .-/]\d{6,9}"
|
||||
)
|
||||
|
||||
NUMBERS_REGEX = re.compile(
|
||||
|
||||
@@ -36,10 +36,29 @@ def test_replace_emails():
|
||||
assert cleantext.replace_emails(text, "*EMAIL*") == proc_text
|
||||
|
||||
|
||||
phone_numbers = [
|
||||
"+49 123 1548690",
|
||||
"555-123-4567",
|
||||
"2404 9099130",
|
||||
"024049099130",
|
||||
"02404 9099130",
|
||||
"02404/9099130",
|
||||
"+492404 9099130",
|
||||
"+4924049099130",
|
||||
"+492404/9099130",
|
||||
"0160 123456789",
|
||||
"0160/123456789",
|
||||
"+32160 123456789",
|
||||
"Tel.: 0160 123456789",
|
||||
]
|
||||
|
||||
|
||||
def test_replace_phone_numbers():
|
||||
text = "I can be reached at 555-123-4567 through next Friday."
|
||||
proc_text = "I can be reached at *PHONE* through next Friday."
|
||||
assert cleantext.replace_phone_numbers(text, "*PHONE*") == proc_text
|
||||
for x in phone_numbers:
|
||||
x_phone = cleantext.replace_phone_numbers(x, "*PHONE*")
|
||||
assert "PHONE" in x_phone and not any(map(str.isdigit, x_phone)), (
|
||||
x + " / " + x_phone
|
||||
)
|
||||
|
||||
|
||||
def test_replace_numbers():
|
||||
|
||||
Reference in New Issue
Block a user