parser.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import bleach
  2. from bs4 import BeautifulSoup
  3. from htmlmin.minify import html_minify
  4. import markdown
  5. from misago.markup.bbcode import inline, blocks
  6. from misago.markup.pipeline import pipeline
  7. __all__ = ['parse']
  8. def parse(text, request, poster, allow_mentions=True, allow_links=True,
  9. allow_images=True, allow_blocks=True, minify=True):
  10. """
  11. Message parser
  12. Utility for flavours to call
  13. Breaks text into paragraphs, supports code, spoiler and quote blocks,
  14. headers, lists, images, spoilers, text styles
  15. Returns dict object
  16. """
  17. md = md_factory(allow_links=allow_links, allow_images=allow_images,
  18. allow_blocks=allow_blocks)
  19. parsing_result = {
  20. 'original_text': text,
  21. 'parsed_text': '',
  22. 'markdown': md,
  23. 'mentions': [],
  24. 'images': [],
  25. 'outgoing_links': [],
  26. 'inside_links': []
  27. }
  28. # Parse text
  29. parsed_text = md.convert(text)
  30. # Clean and store parsed text
  31. parsing_result['parsed_text'] = parsed_text.strip()
  32. if allow_links:
  33. linkify_paragraphs(parsing_result)
  34. if allow_links or allow_images:
  35. make_absolute_links_relative(parsing_result, request)
  36. parsing_result = pipeline.process_result(parsing_result)
  37. if minify:
  38. minify_result(parsing_result)
  39. return parsing_result
  40. def linkify_paragraphs(result):
  41. result['parsed_text'] = bleach.linkify(
  42. result['parsed_text'], skip_pre=True, parse_email=True)
  43. def make_absolute_links_relative(result, request):
  44. pass
  45. def minify_result(result):
  46. # [25:-14] trims <html><head></head><body> and </body></html>
  47. result['parsed_text'] = html_minify(result['parsed_text'])[25:-14]
  48. def md_factory(allow_links=True, allow_images=True, allow_blocks=True):
  49. """
  50. Create and configure markdown object
  51. """
  52. md = markdown.Markdown(safe_mode='escape',
  53. extensions=['nl2br'])
  54. # Remove references
  55. del md.preprocessors['reference']
  56. del md.inlinePatterns['reference']
  57. del md.inlinePatterns['image_reference']
  58. del md.inlinePatterns['short_reference']
  59. # Add [b], [i], [u]
  60. md.inlinePatterns.add('bb_b', inline.bold, '<strong')
  61. md.inlinePatterns.add('bb_i', inline.italics, '<emphasis')
  62. md.inlinePatterns.add('bb_u', inline.underline, '<emphasis2')
  63. if allow_links:
  64. # Add [url]
  65. pass
  66. else:
  67. # Remove links
  68. del md.inlinePatterns['link']
  69. del md.inlinePatterns['autolink']
  70. del md.inlinePatterns['automail']
  71. if allow_images:
  72. # Add [img]
  73. pass
  74. else:
  75. # Remove images
  76. del md.inlinePatterns['image_link']
  77. if allow_blocks:
  78. # Add [hr] [quote], [spoiler], [list] and [code] blocks
  79. md.parser.blockprocessors.add('bb_hr',
  80. blocks.BBCodeHRProcessor(md.parser),
  81. '>hr')
  82. else:
  83. # Remove blocks
  84. del md.parser.blockprocessors['hashheader']
  85. del md.parser.blockprocessors['setextheader']
  86. del md.parser.blockprocessors['code']
  87. del md.parser.blockprocessors['quote']
  88. del md.parser.blockprocessors['hr']
  89. del md.parser.blockprocessors['olist']
  90. del md.parser.blockprocessors['ulist']
  91. return pipeline.extend_markdown(md)