magiclinks.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. #-*- coding: utf-8 -*-
  2. import re
  3. import markdown
  4. from markdown.inlinepatterns import LinkPattern
  5. from markdown.postprocessors import RawHtmlPostprocessor
  6. from markdown.util import etree
  7. from misago.utils.strings import html_escape
  8. from misago.utils.urls import is_inner, clean_inner
  9. # Global vars
  10. MAGICLINKS_RE = re.compile(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', re.UNICODE)
  11. class MagicLinksExtension(markdown.Extension):
  12. def extendMarkdown(self, md):
  13. md.registerExtension(self)
  14. md.treeprocessors.add('mi_magiclinks',
  15. MagicLinksTreeprocessor(md),
  16. '_end')
  17. class MagicLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):
  18. def run(self, root):
  19. return self.walk_tree(root)
  20. def walk_tree(self, node):
  21. def parse_link(matchobj):
  22. link = LinkPattern(MAGICLINKS_RE, self.markdown)
  23. href = link.sanitize_url(link.unescape(matchobj.group(0).strip()))
  24. if href:
  25. if is_inner(href):
  26. clean = clean_inner(href)
  27. return self.markdown.htmlStash.store('<a href="%s">%s</a>' % (clean, clean[1:]), safe=True)
  28. else:
  29. return self.markdown.htmlStash.store('<a href="%(href)s" rel="nofollow">%(href)s</a>' % {'href': href}, safe=True)
  30. else:
  31. return matchobj.group(0)
  32. if node.tag not in ['code', 'pre', 'a', 'img']:
  33. if node.text and unicode(node.text).strip():
  34. node.text = MAGICLINKS_RE.sub(parse_link, unicode(node.text))
  35. if node.tail and unicode(node.tail).strip():
  36. node.tail = MAGICLINKS_RE.sub(parse_link, unicode(node.tail))
  37. for i in node:
  38. self.walk_tree(i)