magiclinks.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #-*- coding: utf-8 -*-
  2. import re
  3. import markdown
  4. from markdown.inlinepatterns import LinkPattern
  5. from markdown.postprocessors import RawHtmlPostprocessor
  6. from markdown.util import etree
  7. # Global vars
  8. MAGICLINKS_RE = re.compile(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', re.UNICODE)
  9. class MagicLinksExtension(markdown.Extension):
  10. def extendMarkdown(self, md):
  11. md.registerExtension(self)
  12. md.treeprocessors.add('mi_magiclinks',
  13. MagicLinksTreeprocessor(md),
  14. '_end')
  15. class MagicLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):
  16. def run(self, root):
  17. return self.walk_tree(root)
  18. def walk_tree(self, node):
  19. def parse_link(matchobj):
  20. link = LinkPattern(MAGICLINKS_RE, self.markdown)
  21. href = link.sanitize_url(link.unescape(matchobj.group(0).strip()))
  22. if href:
  23. href = self.escape(href)
  24. return self.markdown.htmlStash.store('<a href="%(href)s">%(href)s</a>' % {'href': href}, safe=True)
  25. else:
  26. return matchobj.group(0)
  27. if node.tag not in ['code', 'pre', 'a', 'img']:
  28. if node.text and unicode(node.text).strip():
  29. node.text = MAGICLINKS_RE.sub(parse_link, unicode(node.text))
  30. if node.tail and unicode(node.tail).strip():
  31. node.tail = MAGICLINKS_RE.sub(parse_link, unicode(node.tail))
  32. for i in node:
  33. self.walk_tree(i)
  34. def escape(self, html):
  35. html = html.replace('&', '&amp;')
  36. html = html.replace('<', '&lt;')
  37. html = html.replace('>', '&gt;')
  38. return html.replace('"', '&quot;')