import re
__all__ = ('preprocess', 'remove_formatting', 'replace_emotes',
'simplify_links', 'expand_links', 'links_to_markup')
formatting_re = re.compile(r'##[0-9bB]')
emotes_re = re.compile(r'%%[^%]')
mplus_link_re = re.compile(r'@@([^|]+)\|([^@]+)@@')
url_re = re.compile(r'(^|[^@|])((http|https|ftp)://([^\t ]+))')
def remove_formatting(text):
return re.sub(formatting_re, '', text)
def replace_emotes(text):
emotes = ( ":-D", ":-)", ";-)", ":-(", ":-o", ":-|", ":-/", "B-)",
":-D", ":-[", ":-P", "*blush*", ":'-(", "*:-]*",
"*weird emote*", "*ninja*", ":-)", "*star*", "*?*", "*!*", "*idea*",
"*->*", "<3", "^_^", ":-)", ";-)", ":-(", ":-O", ":-(",
"*mimi*", ":-D", ":-D", "*perturbed*", ":-P",
"*shame*", ":-(", ">:-D", "0_o", "*ninja*", "*bad geek*",
"*star*", "*?*", "*!*", "*bubble*", ">_>", "*in love*",
"*disgust*", ">:-D", ":-(", "xD", "u.u", "x_x",
"*facepalm*", ">:-D", "*angry*", ":-D", "*metal*",
":'-(", "*...*", "*@:=*", ":3", "*zZzZz*", "-.-'",
"*alien*")
def emote_repl(m):
code = ord(m.group(0)[2]) - 48
if code > len(emotes):
return m.group(0)
else:
return emotes[code]
return re.sub(emotes_re, emote_repl, text)
def simplify_links(text):
def simplify(m):
return m.group(2)
return re.sub(mplus_link_re, simplify, text)
def expand_links(text):
def expand(m):
return '{}[@@{}|{}@@]'.format(m.group(1), m.group(2), m.group(4))
# text = ' ' + text
return re.sub(url_re, expand, text)
def links_to_markup(text):
return re.sub(mplus_link_re,
r'[ref=\1][color=2F3Fff]\2[/color][/ref]',
text)
def preprocess(text, actions=(simplify_links,
remove_formatting,
replace_emotes)):
for f in actions:
text = f(text)
return text