magiclinks.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. #-*- coding: utf-8 -*-
  2. import re
  3. import markdown
  4. from markdown.inlinepatterns import LinkPattern
  5. from markdown.postprocessors import RawHtmlPostprocessor
  6. from markdown.util import etree
  7. from misago.utils.strings import html_escape
  8. from misago.utils.urls import is_inner, clean_inner, clean_outer
  9. # Global vars
  10. MAGICLINKS_RE = re.compile(r'(\<)?(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))(\>)?', re.UNICODE)
  11. class MagicLinksExtension(markdown.Extension):
  12. def extendMarkdown(self, md):
  13. md.registerExtension(self)
  14. md.treeprocessors.add('mi_magiclinks',
  15. MagicLinksTreeprocessor(md),
  16. '_end')
  17. class MagicLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):
  18. def run(self, root):
  19. return self.walk_tree(root)
  20. def walk_tree(self, node):
  21. def parse_link(matchobj):
  22. matched_link = matchobj.group(0).strip()
  23. if matched_link[0] == '<':
  24. matched_link = matched_link[1:]
  25. if matched_link[-1] == '>':
  26. matched_link = matched_link[:-1]
  27. link = LinkPattern(MAGICLINKS_RE, self.markdown)
  28. href = link.sanitize_url(link.unescape(matched_link))
  29. if href:
  30. if is_inner(href):
  31. clean = clean_inner(href)
  32. return self.markdown.htmlStash.store('<a href="%s">%s</a>' % (clean, clean[1:]), safe=True)
  33. else:
  34. clean = clean_outer(href)
  35. return self.markdown.htmlStash.store('<a href="%s" rel="nofollow">%s</a>' % (clean, href), safe=True)
  36. else:
  37. return matchobj.group(0)
  38. if node.tag not in ['code', 'pre', 'a', 'img']:
  39. if node.text and unicode(node.text).strip():
  40. node.text = MAGICLINKS_RE.sub(parse_link, unicode(node.text))
  41. if node.tail and unicode(node.tail).strip():
  42. node.tail = MAGICLINKS_RE.sub(parse_link, unicode(node.tail))
  43. for i in node:
  44. self.walk_tree(i)