parser.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from importlib import import_module
  2. from bs4 import BeautifulSoup
  3. from django.conf import settings
  4. import markdown
  5. from misago.markup.bbcode import inline, blocks
  6. __all__ = ['parse_text']
  7. def parse_text(text, author=None, allow_mentions=True, allow_links=True,
  8. allow_images=True, allow_blocks=True):
  9. """
  10. Message parser
  11. Utility for flavours to call
  12. Breaks text into paragraphs, supports code, spoiler and quote blocks,
  13. headers, lists, images, spoilers, text styles
  14. Returns dict object
  15. """
  16. md = md_factory(author=author, allow_mentions=allow_mentions,
  17. allow_links=allow_links, allow_images=allow_images,
  18. allow_blocks=allow_blocks)
  19. parsing_result = {
  20. 'original_text': text,
  21. 'parsed_text': '',
  22. 'markdown': md,
  23. }
  24. # Parse text
  25. parsed_text = md.convert(text)
  26. # Clean and store parsed text
  27. parsing_result['parsed_text'] = parsed_text.strip()
  28. parsing_result = pipeline.process_result(parsing_result)
  29. return parsing_result
  30. def md_factory(author=None, allow_mentions=True, allow_links=True,
  31. allow_images=True, allow_blocks=True):
  32. """
  33. Create and configure markdown object
  34. """
  35. md = markdown.Markdown(safe_mode='escape',
  36. extensions=['nl2br'])
  37. # Remove references
  38. del md.preprocessors['reference']
  39. del md.inlinePatterns['reference']
  40. del md.inlinePatterns['image_reference']
  41. del md.inlinePatterns['short_reference']
  42. # Add [b], [i], [u]
  43. md.inlinePatterns.add('bb_b', inline.bold, '<strong')
  44. md.inlinePatterns.add('bb_i', inline.italics, '<emphasis')
  45. md.inlinePatterns.add('bb_u', inline.underline, '<emphasis2')
  46. if allow_mentions:
  47. # Register mentions
  48. pass
  49. if allow_links:
  50. # Add [url]
  51. pass
  52. else:
  53. # Remove links
  54. del md.inlinePatterns['link']
  55. del md.inlinePatterns['autolink']
  56. del md.inlinePatterns['automail']
  57. if allow_images:
  58. # Add [img]
  59. pass
  60. else:
  61. # Remove images
  62. del md.inlinePatterns['image_link']
  63. if allow_blocks:
  64. # Add [hr] [quote], [spoiler], [list] and [code] blocks
  65. md.parser.blockprocessors.add('bb_hr',
  66. blocks.BBCodeHRProcessor(md.parser),
  67. '>hr')
  68. else:
  69. # Remove blocks
  70. del md.parser.blockprocessors['hashheader']
  71. del md.parser.blockprocessors['setextheader']
  72. del md.parser.blockprocessors['code']
  73. del md.parser.blockprocessors['quote']
  74. del md.parser.blockprocessors['hr']
  75. del md.parser.blockprocessors['olist']
  76. del md.parser.blockprocessors['ulist']
  77. return pipeline.extend_markdown(md)
  78. class MarkupPipeline(object):
  79. """
  80. Small framework for extending parser
  81. """
  82. def extend_markdown(self, md):
  83. for extension in settings.MISAGO_MARKUP_EXTENSIONS:
  84. module = import_module(extension)
  85. if hasattr(module, 'extend_markdown'):
  86. hook = getattr(module, 'extend_markdown')
  87. hook.extend_markdown(md)
  88. return md
  89. def process_result(self, result):
  90. soup = BeautifulSoup(result['parsed_text'])
  91. for extension in settings.MISAGO_MARKUP_EXTENSIONS:
  92. module = import_module(extension)
  93. if hasattr(module, 'clean_parsed'):
  94. hook = getattr(module, 'clean_parsed')
  95. hook.process_result(result, soup)
  96. souped_text = unicode(soup.body).strip()[6:-7]
  97. result['parsed_text'] = souped_text
  98. return result
  99. pipeline = MarkupPipeline()