factory.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import re
  2. import markdown
  3. from HTMLParser import HTMLParser
  4. from django.conf import settings
  5. from django.utils.importlib import import_module
  6. from django.utils.translation import ugettext_lazy as _
  7. from misago.utils.strings import random_string
  8. class ClearHTMLParser(HTMLParser):
  9. def __init__(self):
  10. HTMLParser.__init__(self)
  11. self.clean_text = ''
  12. self.lookback = []
  13. def handle_entityref(self, name):
  14. if name == 'gt':
  15. self.clean_text += '>'
  16. if name == 'lt':
  17. self.clean_text += '<'
  18. def handle_starttag(self, tag, attrs):
  19. self.lookback.append(tag)
  20. def handle_endtag(self, tag):
  21. try:
  22. if self.lookback[-1] == tag:
  23. self.lookback.pop()
  24. except IndexError:
  25. pass
  26. def handle_data(self, data):
  27. # String does not repeat itself
  28. if self.clean_text[-len(data):] != data:
  29. # String is not "QUOTE"
  30. try:
  31. if self.lookback[-1] in ('strong', 'em'):
  32. self.clean_text += data
  33. elif not (data == 'Quote' and self.lookback[-1] == 'h3' and self.lookback[-2] == 'blockquote'):
  34. self.clean_text += data
  35. except IndexError:
  36. self.clean_text += data
  37. def clear_markdown(text):
  38. parser = ClearHTMLParser()
  39. parser.feed(text)
  40. return parser.clean_text
  41. def remove_unsupported(md):
  42. # References are evil, we dont support them
  43. del md.preprocessors['reference']
  44. del md.inlinePatterns['reference']
  45. del md.inlinePatterns['image_reference']
  46. del md.inlinePatterns['short_reference']
  47. def signature_markdown(acl, text):
  48. md = markdown.Markdown(
  49. safe_mode='escape',
  50. output_format=settings.OUTPUT_FORMAT,
  51. extensions=['nl2br'])
  52. remove_unsupported(md)
  53. if not acl.usercp.allow_signature_links():
  54. del md.inlinePatterns['link']
  55. del md.inlinePatterns['autolink']
  56. if not acl.usercp.allow_signature_images():
  57. del md.inlinePatterns['image_link']
  58. del md.parser.blockprocessors['hashheader']
  59. del md.parser.blockprocessors['setextheader']
  60. del md.parser.blockprocessors['code']
  61. del md.parser.blockprocessors['quote']
  62. del md.parser.blockprocessors['hr']
  63. del md.parser.blockprocessors['olist']
  64. del md.parser.blockprocessors['ulist']
  65. return md.convert(text)
  66. def post_markdown(request, text):
  67. md = markdown.Markdown(
  68. safe_mode='escape',
  69. output_format=settings.OUTPUT_FORMAT,
  70. extensions=['nl2br', 'fenced_code'])
  71. remove_unsupported(md)
  72. md.mi_token = random_string(16)
  73. for extension in settings.MARKDOWN_EXTENSIONS:
  74. module = '.'.join(extension.split('.')[:-1])
  75. extension = extension.split('.')[-1]
  76. module = import_module(module)
  77. attr = getattr(module, extension)
  78. ext = attr()
  79. ext.extendMarkdown(md)
  80. text = md.convert(text)
  81. return tidy_markdown(md, text)
  82. def tidy_markdown(md, text):
  83. text = text.replace('<p><h3><quotetitle>', '<h3><quotetitle>')
  84. text = text.replace('</quotetitle></h3></p>', '</quotetitle></h3>')
  85. text = text.replace('</quotetitle></h3><br>\r\n', '</quotetitle></h3>\r\n<p>')
  86. text = text.replace('\r\n<p></p>', '')
  87. return md, text
  88. def finalize_markdown(text):
  89. def trans_quotetitle(match):
  90. return _("Posted by %(user)s") % {'user': match.group('content')}
  91. text = re.sub(r'<quotetitle>(?P<content>.+)</quotetitle>', trans_quotetitle, text)
  92. text = re.sub(r'<quotesingletitle>', _("Quote"), text)
  93. return text