#-*- coding: utf-8 -*-
import re
import markdown
from markdown.inlinepatterns import LinkPattern
from markdown.postprocessors import RawHtmlPostprocessor
from markdown.util import etree
from misago.utils.strings import html_escape
from misago.utils.urls import is_inner, clean_inner
# Global vars
MAGICLINKS_RE = re.compile(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', re.UNICODE)
class MagicLinksExtension(markdown.Extension):
def extendMarkdown(self, md):
md.registerExtension(self)
md.treeprocessors.add('mi_magiclinks',
MagicLinksTreeprocessor(md),
'_end')
class MagicLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):
def run(self, root):
return self.walk_tree(root)
def walk_tree(self, node):
def parse_link(matchobj):
link = LinkPattern(MAGICLINKS_RE, self.markdown)
href = link.sanitize_url(link.unescape(matchobj.group(0).strip()))
if href:
if is_inner(href):
clean = clean_inner(href)
return self.markdown.htmlStash.store('%s' % (clean, clean[1:]), safe=True)
else:
return self.markdown.htmlStash.store('%(href)s' % {'href': href}, safe=True)
else:
return matchobj.group(0)
if node.tag not in ['code', 'pre', 'a', 'img']:
if node.text and unicode(node.text).strip():
node.text = MAGICLINKS_RE.sub(parse_link, unicode(node.text))
if node.tail and unicode(node.tail).strip():
node.tail = MAGICLINKS_RE.sub(parse_link, unicode(node.tail))
for i in node:
self.walk_tree(i)