xxxxxxxxxx
def remove_accents(raw_text):
"""Removes common accent characters.
Our goal is to brute force login mechanisms, and I work primary with
companies deploying Engligh-language systems. From my experience, user
accounts tend to be created without special accented characters. This
function tries to swap those out for standard Engligh alphabet.
"""
raw_text = re.sub(u"[àáâãäå]", 'a', raw_text)
raw_text = re.sub(u"[èéêë]", 'e', raw_text)
raw_text = re.sub(u"[ìíîï]", 'i', raw_text)
raw_text = re.sub(u"[òóôõö]", 'o', raw_text)
raw_text = re.sub(u"[ùúûü]", 'u', raw_text)
raw_text = re.sub(u"[ýÿ]", 'y', raw_text)
raw_text = re.sub(u"[ß]", 'ss', raw_text)
raw_text = re.sub(u"[ñ]", 'n', raw_text)
return raw_text
xxxxxxxxxx
def simplify(text):
import unicodedata
try:
text = unicode(text, 'utf-8')
except NameError:
pass
text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode("utf-8")
return str(text)
xxxxxxxxxx
from unidecode import unidecode
unidecode(u'ıöüç')
# Returns: 'iouc'
xxxxxxxxxx
function toNormalForm(str) {
return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
}
xxxxxxxxxx
def convert_to_non_accent(string):
""" Function to convert accent characters to non accent
characters.
:param string: String to be converted.
:type string: str
:return: str
"""
return ''.join(ch for ch in unicodedata.normalize('NFKD', string)
if not unicodedata.combining(ch))