parser.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import markdown
  2. from markdown.extensions.fenced_code import FencedCodeExtension
  3. from .bbcode.code import CodeBlockExtension
  4. from .bbcode.hr import BBCodeHRProcessor
  5. from .bbcode.inline import bold, image, italics, underline, url
  6. from .bbcode.quote import QuoteExtension
  7. from .bbcode.spoiler import SpoilerExtension
  8. from .htmlparser import parse_html_string, print_html_string
  9. from .links import clean_links, linkify_texts
  10. from .md.shortimgs import ShortImagesExtension
  11. from .md.strikethrough import StrikethroughExtension
  12. from .mentions import add_mentions
  13. from .pipeline import pipeline
  14. def parse(
  15. text,
  16. request,
  17. poster,
  18. allow_mentions=True,
  19. allow_links=True,
  20. allow_images=True,
  21. allow_blocks=True,
  22. force_shva=False,
  23. ):
  24. """
  25. Message parser
  26. Utility for flavours to call
  27. Breaks text into paragraphs, supports code, spoiler and quote blocks,
  28. headers, lists, images, spoilers, text styles
  29. Returns dict object
  30. """
  31. md = md_factory(
  32. allow_links=allow_links, allow_images=allow_images, allow_blocks=allow_blocks
  33. )
  34. parsing_result = {
  35. "original_text": text,
  36. "parsed_text": "",
  37. "markdown": md,
  38. "mentions": [],
  39. "images": [],
  40. "internal_links": [],
  41. "outgoing_links": [],
  42. }
  43. # Parse text
  44. parsed_text = md.convert(text)
  45. # Clean and store parsed text
  46. parsing_result["parsed_text"] = parsed_text.strip()
  47. # Run additional operations
  48. if allow_mentions or allow_links or allow_images:
  49. root_node = parse_html_string(parsing_result["parsed_text"])
  50. if allow_links:
  51. linkify_texts(root_node)
  52. if allow_mentions:
  53. add_mentions(parsing_result, root_node)
  54. if allow_links or allow_images:
  55. clean_links(request, parsing_result, root_node, force_shva)
  56. parsing_result["parsed_text"] = print_html_string(root_node)
  57. # Let plugins do their magic
  58. parsing_result = pipeline.process_result(parsing_result)
  59. return parsing_result
  60. def md_factory(allow_links=True, allow_images=True, allow_blocks=True):
  61. """creates and configures markdown object"""
  62. md = markdown.Markdown(extensions=["markdown.extensions.nl2br"])
  63. # Remove HTML allowances
  64. md.preprocessors.deregister("html_block")
  65. md.inlinePatterns.deregister("html")
  66. # Remove references
  67. md.parser.blockprocessors.deregister("reference")
  68. md.inlinePatterns.deregister("reference")
  69. md.inlinePatterns.deregister("image_reference")
  70. md.inlinePatterns.deregister("short_reference")
  71. # Add [b], [i], [u]
  72. md.inlinePatterns.register(bold, "bb_b", 55)
  73. md.inlinePatterns.register(italics, "bb_i", 55)
  74. md.inlinePatterns.register(underline, "bb_u", 55)
  75. # Add ~~deleted~~
  76. strikethrough_md = StrikethroughExtension()
  77. strikethrough_md.extendMarkdown(md)
  78. if allow_links:
  79. # Add [url]
  80. md.inlinePatterns.register(url(md), "bb_url", 155)
  81. else:
  82. # Remove links
  83. md.inlinePatterns.deregister("link")
  84. md.inlinePatterns.deregister("autolink")
  85. md.inlinePatterns.deregister("automail")
  86. if allow_images:
  87. # Add [img]
  88. md.inlinePatterns.register(image(md), "bb_img", 145)
  89. short_images_md = ShortImagesExtension()
  90. short_images_md.extendMarkdown(md)
  91. else:
  92. # Remove images
  93. md.inlinePatterns.deregister("image_link")
  94. if allow_blocks:
  95. # Add [hr] and [quote] blocks
  96. md.parser.blockprocessors.register(BBCodeHRProcessor(md.parser), "bb_hr", 45)
  97. fenced_code = FencedCodeExtension(lang_prefix="language-")
  98. fenced_code.extendMarkdown(md)
  99. code_bbcode = CodeBlockExtension()
  100. code_bbcode.extendMarkdown(md)
  101. quote_bbcode = QuoteExtension()
  102. quote_bbcode.extendMarkdown(md)
  103. spoiler_bbcode = SpoilerExtension()
  104. spoiler_bbcode.extendMarkdown(md)
  105. else:
  106. # Remove blocks
  107. md.parser.blockprocessors.deregister("hashheader")
  108. md.parser.blockprocessors.deregister("setextheader")
  109. md.parser.blockprocessors.deregister("code")
  110. md.parser.blockprocessors.deregister("quote")
  111. md.parser.blockprocessors.deregister("hr")
  112. md.parser.blockprocessors.deregister("olist")
  113. md.parser.blockprocessors.deregister("ulist")
  114. return pipeline.extend_markdown(md)