Просмотр исходного кода

Remove beautiful soup dependency (#1453)

* WIP replace beautiful soup with custom HTML util

* Format code with black

* Remove unused imports

* Remove beautifulsoup, bleach and htmlmin

* Bump version to 0.29

* Remove BS4 import

* Fix function call

* Tweak HTML parser

* Tweak parser
Rafał Pitoń 2 лет назад
Родитель
Сommit
161da7d69d
33 измененных файлов с 711 добавлено и 367 удалено
  1. 1 1
      misago/__init__.py
  2. 0 5
      misago/conf/defaults.py
  3. 110 0
      misago/markup/htmlparser.py
  4. 199 0
      misago/markup/links.py
  5. 104 41
      misago/markup/mentions.py
  6. 16 130
      misago/markup/parser.py
  7. 11 7
      misago/markup/pipeline.py
  8. 6 4
      misago/markup/tests/snapshots/snap_test_code_bbcode.py
  9. 3 3
      misago/markup/tests/snapshots/snap_test_code_md.py
  10. 15 4
      misago/markup/tests/snapshots/snap_test_escaping.py
  11. 1 1
      misago/markup/tests/snapshots/snap_test_hr_bbcode.py
  12. 6 6
      misago/markup/tests/snapshots/snap_test_inline_bbcode.py
  13. 2 2
      misago/markup/tests/snapshots/snap_test_link_handling.py
  14. 22 22
      misago/markup/tests/snapshots/snap_test_quote_bbcode.py
  15. 7 3
      misago/markup/tests/snapshots/snap_test_short_image_markdown.py
  16. 8 8
      misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py
  17. 5 5
      misago/markup/tests/test_code_bbcode.py
  18. 3 3
      misago/markup/tests/test_code_md.py
  19. 10 4
      misago/markup/tests/test_escaping.py
  20. 2 2
      misago/markup/tests/test_hr_bbcode.py
  21. 69 0
      misago/markup/tests/test_htmlparser.py
  22. 16 16
      misago/markup/tests/test_inline_bbcode.py
  23. 22 22
      misago/markup/tests/test_link_handling.py
  24. 52 21
      misago/markup/tests/test_mentions.py
  25. 0 17
      misago/markup/tests/test_parser.py
  26. 7 7
      misago/markup/tests/test_quote_bbcode.py
  27. 1 1
      misago/markup/tests/test_short_image_markdown.py
  28. 5 5
      misago/markup/tests/test_spoiler_bbcode.py
  29. 1 1
      misago/markup/tests/test_strikethrough_markdown.py
  30. 3 3
      misago/threads/api/postingendpoint/mentions.py
  31. 2 3
      misago/threads/tests/test_post_mentions.py
  32. 0 3
      requirements.in
  33. 2 17
      requirements.txt

+ 1 - 1
misago/__init__.py

@@ -1,5 +1,5 @@
 from .plugins.pluginlist import load_plugin_list_if_exists
 from .plugins.pluginlist import load_plugin_list_if_exists
 
 
 
 
-__version__ = "0.28.2"
+__version__ = "0.29.0"
 __released__ = True
 __released__ = True

+ 0 - 5
misago/conf/defaults.py

@@ -36,11 +36,6 @@ MISAGO_USER_DATA_DOWNLOADS_WORKING_DIR = None
 MISAGO_MARKUP_EXTENSIONS = []
 MISAGO_MARKUP_EXTENSIONS = []
 
 
 
 
-# Bleach callbacks for linkifying paragraphs
-
-MISAGO_BLEACH_CALLBACKS = []
-
-
 # Custom post validators
 # Custom post validators
 
 
 MISAGO_POST_VALIDATORS = []
 MISAGO_POST_VALIDATORS = []

+ 110 - 0
misago/markup/htmlparser.py

@@ -0,0 +1,110 @@
+import html
+from dataclasses import dataclass
+
+import html5lib
+
+SINGLETON_TAGS = (
+    "area",
+    "base",
+    "br",
+    "col",
+    "command",
+    "embed",
+    "hr",
+    "img",
+    "input",
+    "keygen",
+    "link",
+    "meta",
+    "param",
+    "source",
+    "track",
+    "wbr",
+)
+
+
+class Node:
+    def __str__(self):
+        raise NotImplementedError("Subclasses of 'Node' need to implement __str__")
+
+
+@dataclass
+class RootNode(Node):
+    tag = None
+    children: list
+
+    def __str__(self):
+        return "".join(str(child) for child in self.children)
+
+
+@dataclass
+class ElementNode(Node):
+    tag: str
+    attrs: dict
+    children: list
+
+    def __str__(self):
+        attrs_padding = " " if self.attrs else ""
+        attrs = " ".join(self.attrs_str())
+
+        if self.tag in SINGLETON_TAGS:
+            return f"<{self.tag}{attrs_padding}{attrs} />"
+
+        children = "".join(str(child) for child in self.children)
+        return f"<{self.tag}{attrs_padding}{attrs}>{children}</{self.tag}>"
+
+    def attrs_str(self):
+        for name, value in self.attrs.items():
+            if value is True or not value:
+                yield html.escape(str(name))
+            else:
+                yield (f'{html.escape(str(name))}="{html.escape(str(value))}"')
+
+
+@dataclass
+class TextNode(Node):
+    text: str
+
+    def __str__(self):
+        return html.escape(self.text)
+
+
+def parse_html_string(string: str) -> RootNode:
+    element = html5lib.parse(
+        string,
+        namespaceHTMLElements=False,
+    )
+
+    body = element.find("body")
+    root_node = RootNode(children=[])
+
+    if body.text:
+        root_node.children.append(TextNode(text=body.text))
+
+    for child in body:
+        add_child_node(root_node, child)
+
+    return root_node
+
+
+def add_child_node(parent, element):
+    node = ElementNode(
+        tag=element.tag,
+        attrs=element.attrib,
+        children=[],
+    )
+
+    if element.text:
+        node.children.append(TextNode(text=element.text))
+
+    parent.children.append(node)
+
+    if element.tail:
+        parent.children.append(TextNode(text=element.tail))
+
+    for child in element:
+        add_child_node(node, child)
+
+
+def print_html_string(root_node: RootNode) -> str:
+    return str(root_node)

+ 199 - 0
misago/markup/links.py

@@ -0,0 +1,199 @@
+import re
+from typing import Union
+
+from django.http import Http404
+from django.urls import resolve
+
+from .htmlparser import ElementNode, RootNode, TextNode
+
+MISAGO_ATTACHMENT_VIEWS = ("misago:attachment", "misago:attachment-thumbnail")
+URL_RE = re.compile(
+    r"(https?://)?"
+    r"(www\.)?"
+    r"(\w+((-|_)\w+)?\.)?"
+    r"\w+((_|-|\w)+)?(\.[a-z][a-z]+)"
+    r"(:[1-9][0-9]+)?"
+    r"([^\s<>\[\]\(\);:]+)?"
+)
+
+
+def linkify_texts(node: Union[RootNode, ElementNode]):
+    # Skip link replacement in some nodes
+    if node.tag in ("pre", "code", "a"):
+        return
+
+    new_children = []
+    for child in node.children:
+        if isinstance(child, TextNode):
+            if URL_RE.search(child.text):
+                new_children += replace_links_in_text(child.text)
+            else:
+                new_children.append(child)
+        else:
+            new_children.append(child)
+            linkify_texts(child)
+
+    node.children = new_children
+
+
+def replace_links_in_text(text: str) -> list:
+    nodes = []
+
+    while True:
+        match = URL_RE.search(text)
+        if not match:
+            if text:
+                nodes.append(TextNode(text=text))
+            return nodes
+
+        start, end = match.span()
+        url = text[start:end]
+
+        # Append text between 0 and start to nodes
+        if start > 0:
+            nodes.append(TextNode(text=text[:start]))
+
+        nodes.append(
+            ElementNode(
+                tag="a",
+                attrs={"href": url},
+                children=[
+                    TextNode(text=strip_link_protocol(url)),
+                ],
+            )
+        )
+
+        text = text[end:]
+
+
+def clean_links(
+    request,
+    result,
+    node: Union[RootNode, ElementNode, TextNode],
+    force_shva=False,
+):
+    if isinstance(node, TextNode):
+        return
+
+    for child in node.children:
+        if not isinstance(child, ElementNode):
+            continue
+
+        if child.tag == "a":
+            clean_link_node(request, result, child, force_shva)
+            clean_links(request, result, child, force_shva)
+        elif child.tag == "img":
+            clean_image_node(request, result, child, force_shva)
+        else:
+            clean_links(request, result, child, force_shva)
+
+
+def clean_link_node(
+    request,
+    result: dict,
+    node: ElementNode,
+    force_shva: bool,
+):
+    host = request.get_host()
+    href = node.attrs.get("href") or "/"
+
+    if is_internal_link(href, host):
+        href = clean_internal_link(href, host)
+        result["internal_links"].append(href)
+        href = clean_attachment_link(href, force_shva)
+    else:
+        result["outgoing_links"].append(strip_link_protocol(href))
+        href = assert_link_prefix(href)
+        node.attrs["rel"] = "external nofollow noopener"
+
+    node.attrs["target"] = "_blank"
+    node.attrs["href"] = href
+
+    if len(node.children) == 0:
+        node.children.append(strip_link_protocol(href))
+    elif len(node.children) == 1 and isinstance(node.children[0], TextNode):
+        text = node.children[0].text
+        if URL_RE.match(text):
+            node.children[0].text = strip_link_protocol(text)
+
+
+def clean_image_node(
+    request,
+    result: dict,
+    node: ElementNode,
+    force_shva: bool,
+):
+    host = request.get_host()
+    src = node.attrs.get("src") or "/"
+
+    node.attrs["alt"] = strip_link_protocol(node.attrs["alt"])
+
+    if is_internal_link(src, host):
+        src = clean_internal_link(src, host)
+        result["images"].append(src)
+        src = clean_attachment_link(src, force_shva)
+    else:
+        result["images"].append(strip_link_protocol(src))
+        src = assert_link_prefix(src)
+
+    node.attrs["src"] = src
+
+
+def is_internal_link(link, host):
+    if link.startswith("/") and not link.startswith("//"):
+        return True
+
+    link = strip_link_protocol(link).lstrip("www.").lower()
+    return link.lower().startswith(host.lstrip("www."))
+
+
+def strip_link_protocol(link):
+    if link.lower().startswith("https:"):
+        link = link[6:]
+    if link.lower().startswith("http:"):
+        link = link[5:]
+    if link.startswith("//"):
+        link = link[2:]
+    return link
+
+
+def assert_link_prefix(link):
+    if link.lower().startswith("https:"):
+        return link
+    if link.lower().startswith("http:"):
+        return link
+    if link.startswith("//"):
+        return "http:%s" % link
+
+    return "http://%s" % link
+
+
+def clean_internal_link(link, host):
+    link = strip_link_protocol(link)
+
+    if link.lower().startswith("www."):
+        link = link[4:]
+    if host.lower().startswith("www."):
+        host = host[4:]
+
+    if link.lower().startswith(host):
+        link = link[len(host) :]
+
+    return link or "/"
+
+
+def clean_attachment_link(link, force_shva=False):
+    try:
+        resolution = resolve(link)
+        if not resolution.namespaces:
+            return link
+        url_name = ":".join(resolution.namespaces + [resolution.url_name])
+    except (Http404, ValueError):
+        return link
+
+    if url_name in MISAGO_ATTACHMENT_VIEWS:
+        if force_shva:
+            link = "%s?shva=1" % link
+        elif link.endswith("?shva=1"):
+            link = link[:-7]
+    return link

+ 104 - 41
misago/markup/mentions.py

@@ -1,65 +1,128 @@
 import re
 import re
+from typing import Union
 
 
-from bs4 import BeautifulSoup
 from django.contrib.auth import get_user_model
 from django.contrib.auth import get_user_model
 
 
-SUPPORTED_TAGS = ("h1", "h2", "h3", "h4", "h5", "h6", "div", "p")
+from .htmlparser import (
+    ElementNode,
+    RootNode,
+    TextNode,
+)
+
+EXCLUDE_ELEMENTS = ("pre", "code", "a")
 USERNAME_RE = re.compile(r"@[0-9a-z]+", re.IGNORECASE)
 USERNAME_RE = re.compile(r"@[0-9a-z]+", re.IGNORECASE)
-MENTIONS_LIMIT = 24
+MENTIONS_LIMIT = 32
 
 
 
 
-def add_mentions(request, result):
+def add_mentions(result, root_node):
     if "@" not in result["parsed_text"]:
     if "@" not in result["parsed_text"]:
         return
         return
 
 
-    mentions_dict = {}
+    mentions = set()
+    nodes = []
+
+    find_mentions(root_node, mentions, nodes)
+
+    if not mentions or len(mentions) > MENTIONS_LIMIT:
+        return  # No need to run mentions logic
 
 
-    soup = BeautifulSoup(result["parsed_text"], "html5lib")
+    users_data = get_users_data(mentions)
+    if not users_data:
+        return  # Mentioned users don't exist
 
 
-    elements = []
-    for tagname in SUPPORTED_TAGS:
-        if tagname in result["parsed_text"]:
-            elements += soup.find_all(tagname)
-    for element in elements:
-        add_mentions_to_element(request, element, mentions_dict)
+    for node in nodes:
+        add_mentions_to_node(node, users_data)
 
 
-    result["parsed_text"] = str(soup.body)[6:-7].strip()
-    result["mentions"] = list(filter(bool, mentions_dict.values()))
+    result["mentions"] = [user[0] for user in users_data.values()]
 
 
 
 
-def add_mentions_to_element(request, element, mentions_dict):
-    for item in element.contents:
-        if item.name:
-            if item.name != "a":
-                add_mentions_to_element(request, item, mentions_dict)
-        elif "@" in item.string:
-            parse_string(request, item, mentions_dict)
+def find_mentions(
+    node: Union[ElementNode, RootNode],
+    mentions: set,
+    nodes: set,
+):
+    if isinstance(node, ElementNode) and node.tag in EXCLUDE_ELEMENTS:
+        return
+
+    tracked_node = False
+    for child in node.children:
+        if isinstance(child, TextNode):
+            results = find_mentions_in_str(child.text)
+            if results:
+                mentions.update(results)
+                if not tracked_node:
+                    tracked_node = True
+                    nodes.append(node)
+        else:
+            find_mentions(child, mentions, nodes)
 
 
 
 
-def parse_string(request, element, mentions_dict):
+def find_mentions_in_str(text: str):
+    matches = USERNAME_RE.findall(text)
+    if not matches:
+        return None
+
+    return set([match.lower()[1:] for match in matches])
+
+
+def get_users_data(mentions):
     User = get_user_model()
     User = get_user_model()
+    users_data = {}
+
+    queryset = User.objects.filter(slug__in=mentions).values_list(
+        "id", "username", "slug"
+    )
+
+    for user_id, username, slug in queryset:
+        users_data[slug] = (user_id, username)
+
+    return users_data
+
+
+def add_mentions_to_node(node, users_data):
+    new_children = []
+
+    for child in node.children:
+        if isinstance(child, TextNode):
+            new_children += add_mentions_to_text(child.text, users_data)
+        else:
+            new_children.append(child)
+
+    node.children = new_children
+
+
+def add_mentions_to_text(text: str, users_data):
+    nodes = []
 
 
-    def replace_mentions(matchobj):
-        if len(mentions_dict) >= MENTIONS_LIMIT:
-            return matchobj.group(0)
+    while True:
+        match = USERNAME_RE.search(text)
+        if not match:
+            if text:
+                nodes.append(TextNode(text=text))
+            return nodes
 
 
-        username = matchobj.group(0)[1:].strip().lower()
+        start, end = match.span()
+        user_slug = text[start + 1 : end].lower()
 
 
-        if username not in mentions_dict:
-            if username == request.user.slug:
-                mentions_dict[username] = request.user
-            else:
-                try:
-                    mentions_dict[username] = User.objects.get(slug=username)
-                except User.DoesNotExist:
-                    mentions_dict[username] = None
+        # Append text between 0 and start to nodes
+        if start > 0:
+            nodes.append(TextNode(text=text[:start]))
 
 
-        if mentions_dict[username]:
-            user = mentions_dict[username]
-            return '<a href="%s">@%s</a>' % (user.get_absolute_url(), user.username)
+        # Append match string to nodes and keep scanning
+        if user_slug not in users_data:
+            nodes.append(TextNode(text=text[:end]))
+            text = text[end:]
+            continue
 
 
-        # we've failed to resolve user for username
-        return matchobj.group(0)
+        user_id, username = users_data[user_slug]
+        nodes.append(
+            ElementNode(
+                tag="a",
+                attrs={
+                    "href": f"/u/{user_slug}/{user_id}/",
+                },
+                children=[TextNode(text=f"@{username}")],
+            )
+        )
 
 
-    replaced_string = USERNAME_RE.sub(replace_mentions, element.string)
-    element.replace_with(BeautifulSoup(replaced_string, "html.parser"))
+        text = text[end:]

+ 16 - 130
misago/markup/parser.py

@@ -1,24 +1,18 @@
-import bleach
 import markdown
 import markdown
-from bs4 import BeautifulSoup
-from django.http import Http404
-from django.urls import resolve
-from htmlmin.minify import html_minify
 from markdown.extensions.fenced_code import FencedCodeExtension
 from markdown.extensions.fenced_code import FencedCodeExtension
 
 
-from ..conf import settings
 from .bbcode.code import CodeBlockExtension
 from .bbcode.code import CodeBlockExtension
 from .bbcode.hr import BBCodeHRProcessor
 from .bbcode.hr import BBCodeHRProcessor
 from .bbcode.inline import bold, image, italics, underline, url
 from .bbcode.inline import bold, image, italics, underline, url
 from .bbcode.quote import QuoteExtension
 from .bbcode.quote import QuoteExtension
 from .bbcode.spoiler import SpoilerExtension
 from .bbcode.spoiler import SpoilerExtension
+from .htmlparser import parse_html_string, print_html_string
+from .links import clean_links, linkify_texts
 from .md.shortimgs import ShortImagesExtension
 from .md.shortimgs import ShortImagesExtension
 from .md.strikethrough import StrikethroughExtension
 from .md.strikethrough import StrikethroughExtension
 from .mentions import add_mentions
 from .mentions import add_mentions
 from .pipeline import pipeline
 from .pipeline import pipeline
 
 
-MISAGO_ATTACHMENT_VIEWS = ("misago:attachment", "misago:attachment-thumbnail")
-
 
 
 def parse(
 def parse(
     text,
     text,
@@ -29,7 +23,6 @@ def parse(
     allow_images=True,
     allow_images=True,
     allow_blocks=True,
     allow_blocks=True,
     force_shva=False,
     force_shva=False,
-    minify=True,
 ):
 ):
     """
     """
     Message parser
     Message parser
@@ -61,19 +54,24 @@ def parse(
     # Clean and store parsed text
     # Clean and store parsed text
     parsing_result["parsed_text"] = parsed_text.strip()
     parsing_result["parsed_text"] = parsed_text.strip()
 
 
-    if allow_links:
-        linkify_paragraphs(parsing_result)
+    # Run additional operations
+    if allow_mentions or allow_links or allow_images:
+        root_node = parse_html_string(parsing_result["parsed_text"])
 
 
-    parsing_result = pipeline.process_result(parsing_result)
+        if allow_links:
+            linkify_texts(root_node)
 
 
-    if allow_mentions:
-        add_mentions(request, parsing_result)
+        if allow_mentions:
+            add_mentions(parsing_result, root_node)
 
 
-    if allow_links or allow_images:
-        clean_links(request, parsing_result, force_shva)
+        if allow_links or allow_images:
+            clean_links(request, parsing_result, root_node, force_shva)
+
+        parsing_result["parsed_text"] = print_html_string(root_node)
+
+    # Let plugins do their magic
+    parsing_result = pipeline.process_result(parsing_result)
 
 
-    if minify:
-        minify_result(parsing_result)
     return parsing_result
     return parsing_result
 
 
 
 
@@ -144,115 +142,3 @@ def md_factory(allow_links=True, allow_images=True, allow_blocks=True):
         md.parser.blockprocessors.deregister("ulist")
         md.parser.blockprocessors.deregister("ulist")
 
 
     return pipeline.extend_markdown(md)
     return pipeline.extend_markdown(md)
-
-
-def linkify_paragraphs(result):
-    result["parsed_text"] = bleach.linkify(
-        result["parsed_text"],
-        callbacks=settings.MISAGO_BLEACH_CALLBACKS,
-        skip_tags=["a", "code", "pre"],
-        parse_email=True,
-    )
-
-
-def clean_links(request, result, force_shva=False):
-    host = request.get_host()
-
-    soup = BeautifulSoup(result["parsed_text"], "html5lib")
-    for link in soup.find_all("a"):
-        if is_internal_link(link["href"], host):
-            link["href"] = clean_internal_link(link["href"], host)
-            result["internal_links"].append(link["href"])
-            link["href"] = clean_attachment_link(link["href"], force_shva)
-        else:
-            result["outgoing_links"].append(clean_link_prefix(link["href"]))
-            link["href"] = assert_link_prefix(link["href"])
-            link["rel"] = "external nofollow noopener"
-
-        link["target"] = "_blank"
-
-        if link.string:
-            link.string = clean_link_prefix(link.string)
-
-    for img in soup.find_all("img"):
-        img["alt"] = clean_link_prefix(img["alt"])
-        if is_internal_link(img["src"], host):
-            img["src"] = clean_internal_link(img["src"], host)
-            result["images"].append(img["src"])
-            img["src"] = clean_attachment_link(img["src"], force_shva)
-        else:
-            result["images"].append(clean_link_prefix(img["src"]))
-            img["src"] = assert_link_prefix(img["src"])
-
-    # [6:-7] trims <body></body> wrap
-    result["parsed_text"] = str(soup.body)[6:-7]
-
-
-def is_internal_link(link, host):
-    if link.startswith("/") and not link.startswith("//"):
-        return True
-
-    link = clean_link_prefix(link).lstrip("www.").lower()
-    return link.lower().startswith(host.lstrip("www."))
-
-
-def clean_link_prefix(link):
-    if link.lower().startswith("https:"):
-        link = link[6:]
-    if link.lower().startswith("http:"):
-        link = link[5:]
-    if link.startswith("//"):
-        link = link[2:]
-    return link
-
-
-def assert_link_prefix(link):
-    if link.lower().startswith("https:"):
-        return link
-    if link.lower().startswith("http:"):
-        return link
-    if link.startswith("//"):
-        return "http:%s" % link
-
-    return "http://%s" % link
-
-
-def clean_internal_link(link, host):
-    link = clean_link_prefix(link)
-
-    if link.lower().startswith("www."):
-        link = link[4:]
-    if host.lower().startswith("www."):
-        host = host[4:]
-
-    if link.lower().startswith(host):
-        link = link[len(host) :]
-
-    return link or "/"
-
-
-def clean_attachment_link(link, force_shva=False):
-    try:
-        resolution = resolve(link)
-        if not resolution.namespaces:
-            return link
-        url_name = ":".join(resolution.namespaces + [resolution.url_name])
-    except (Http404, ValueError):
-        return link
-
-    if url_name in MISAGO_ATTACHMENT_VIEWS:
-        if force_shva:
-            link = "%s?shva=1" % link
-        elif link.endswith("?shva=1"):
-            link = link[:-7]
-    return link
-
-
-def minify_result(result):
-    result["parsed_text"] = html_minify(result["parsed_text"])
-    result["parsed_text"] = strip_html_head_body(result["parsed_text"])
-
-
-def strip_html_head_body(parsed_text):
-    # [25:-14] trims <html><head></head><body> and </body></html>
-    return parsed_text[25:-14]

+ 11 - 7
misago/markup/pipeline.py

@@ -1,9 +1,8 @@
 from importlib import import_module
 from importlib import import_module
 
 
-from bs4 import BeautifulSoup
-
 from .. import hooks
 from .. import hooks
 from ..conf import settings
 from ..conf import settings
+from .htmlparser import parse_html_string, print_html_string
 
 
 
 
 class MarkupPipeline:
 class MarkupPipeline:
@@ -22,18 +21,23 @@ class MarkupPipeline:
         return md
         return md
 
 
     def process_result(self, result):
     def process_result(self, result):
-        soup = BeautifulSoup(result["parsed_text"], "html5lib")
+        if (
+            not settings.MISAGO_MARKUP_EXTENSIONS
+            and not hooks.parsing_result_processors
+        ):
+            return result
+
+        html_tree = parse_html_string(result["parsed_text"])
         for extension in settings.MISAGO_MARKUP_EXTENSIONS:
         for extension in settings.MISAGO_MARKUP_EXTENSIONS:
             module = import_module(extension)
             module = import_module(extension)
             if hasattr(module, "clean_parsed"):
             if hasattr(module, "clean_parsed"):
                 hook = getattr(module, "clean_parsed")
                 hook = getattr(module, "clean_parsed")
-                hook.process_result(result, soup)
+                hook.process_result(result, html_tree)
 
 
         for extension in hooks.parsing_result_processors:
         for extension in hooks.parsing_result_processors:
-            extension(result, soup)
+            extension(result, html_tree)
 
 
-        souped_text = str(soup.body).strip()[6:-7]
-        result["parsed_text"] = souped_text.strip()
+        result["parsed_text"] = print_html_string(html_tree)
         return result
         return result
 
 
 
 

+ 6 - 4
misago/markup/tests/snapshots/snap_test_code_bbcode.py

@@ -13,16 +13,18 @@ snapshots[
 
 
 snapshots[
 snapshots[
     "test_code_with_language_parameter 1"
     "test_code_with_language_parameter 1"
-] = '<pre><code class="php">echo("Hello!");</code></pre>'
+] = '<pre><code class="php">echo(&quot;Hello!&quot;);</code></pre>'
 
 
 snapshots[
 snapshots[
     "test_code_with_quoted_language_parameter 1"
     "test_code_with_quoted_language_parameter 1"
-] = '<pre><code class="php">echo("Hello!");</code></pre>'
+] = '<pre><code class="php">echo(&quot;Hello!&quot;);</code></pre>'
 
 
 snapshots[
 snapshots[
     "test_multi_line_code 1"
     "test_multi_line_code 1"
 ] = """<pre><code>&lt;script&gt;
 ] = """<pre><code>&lt;script&gt;
-alert("!")
+alert(&quot;!&quot;)
 &lt;/script&gt;</code></pre>"""
 &lt;/script&gt;</code></pre>"""
 
 
-snapshots["test_single_line_code 1"] = '<pre><code>echo("Hello!");</code></pre>'
+snapshots[
+    "test_single_line_code 1"
+] = "<pre><code>echo(&quot;Hello!&quot;);</code></pre>"

+ 3 - 3
misago/markup/tests/snapshots/snap_test_code_md.py

@@ -10,17 +10,17 @@ snapshots = Snapshot()
 snapshots[
 snapshots[
     "test_multi_line_code_markdown 1"
     "test_multi_line_code_markdown 1"
 ] = """<pre><code>&lt;script&gt;
 ] = """<pre><code>&lt;script&gt;
-alert("!")
+alert(&quot;!&quot;)
 &lt;/script&gt;
 &lt;/script&gt;
 </code></pre>"""
 </code></pre>"""
 
 
 snapshots[
 snapshots[
     "test_multi_line_code_markdown_with_language 1"
     "test_multi_line_code_markdown_with_language 1"
 ] = """<pre><code class="javascript">&lt;script&gt;
 ] = """<pre><code class="javascript">&lt;script&gt;
-alert("!")
+alert(&quot;!&quot;)
 &lt;/script&gt;
 &lt;/script&gt;
 </code></pre>"""
 </code></pre>"""
 
 
 snapshots[
 snapshots[
     "test_single_line_code_markdown 1"
     "test_single_line_code_markdown 1"
-] = '<p><code>&lt;script&gt;alert("!")&lt;/script&gt;</code></p>'
+] = "<p><code>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</code></p>"

+ 15 - 4
misago/markup/tests/snapshots/snap_test_escaping.py

@@ -8,22 +8,33 @@ from snapshottest import Snapshot
 snapshots = Snapshot()
 snapshots = Snapshot()
 
 
 snapshots[
 snapshots[
+    "test_code_in_quote_bbcode_header_is_escaped 1"
+] = """<aside class="quote-block">
+<div class="quote-heading">@Us&quot;&gt;&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;er</div>
+<blockquote class="quote-body">
+<p>Test</p>
+</blockquote>
+</aside>"""
+
+snapshots[
     "test_code_in_quote_bbcode_is_escaped 1"
     "test_code_in_quote_bbcode_is_escaped 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
-<p>&lt;script&gt;alert("!")&lt;/script&gt;</p>
+<p>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</p>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
     "test_code_in_quote_markdown_is_escaped 1"
     "test_code_in_quote_markdown_is_escaped 1"
 ] = """<blockquote>
 ] = """<blockquote>
-<p>&lt;script&gt;alert("!")&lt;/script&gt;</p>
+<p>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</p>
 </blockquote>"""
 </blockquote>"""
 
 
 snapshots[
 snapshots[
     "test_inline_code_is_escaped 1"
     "test_inline_code_is_escaped 1"
-] = '<p><code>&lt;script&gt;alert("!")&lt;/script&gt;</code></p>'
+] = "<p><code>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</code></p>"
 
 
-snapshots["test_text_is_escaped 1"] = '<p>&lt;script&gt;alert("!")&lt;/script&gt;</p>'
+snapshots[
+    "test_text_is_escaped 1"
+] = "<p>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</p>"

+ 1 - 1
misago/markup/tests/snapshots/snap_test_hr_bbcode.py

@@ -10,5 +10,5 @@ snapshots = Snapshot()
 snapshots[
 snapshots[
     "test_hr_bbcode_is_replaced_if_its_alone_in_paragraph 1"
     "test_hr_bbcode_is_replaced_if_its_alone_in_paragraph 1"
 ] = """<p>Lorem ipsum dolor met.</p>
 ] = """<p>Lorem ipsum dolor met.</p>
-<hr/>
+<hr />
 <p>Sit amet elit.</p>"""
 <p>Sit amet elit.</p>"""

+ 6 - 6
misago/markup/tests/snapshots/snap_test_inline_bbcode.py

@@ -11,15 +11,15 @@ snapshots["test_bold_bbcode 1"] = "<p>Lorem <b>ipsum</b>!</p>"
 
 
 snapshots[
 snapshots[
     "test_image_bbcode 1"
     "test_image_bbcode 1"
-] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500"/> ipsum</p>'
+] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500" /> ipsum</p>'
 
 
 snapshots[
 snapshots[
     "test_image_bbcode_is_case_insensitive 1"
     "test_image_bbcode_is_case_insensitive 1"
-] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500"/> ipsum</p>'
+] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500" /> ipsum</p>'
 
 
 snapshots[
 snapshots[
     "test_image_bbcode_is_escaped 1"
     "test_image_bbcode_is_escaped 1"
-] = "<p>Lorem <img alt='&lt;script language=\"application/javascript\"&gt;' src='http://&lt;script language=\"application/javascript\"&gt;'/> ipsum</p>"
+] = '<p>Lorem <img alt="&lt;script language=&quot;application/javascript&quot;&gt;" src="http://&lt;script language=&quot;application/javascript&quot;&gt;" /> ipsum</p>'
 
 
 snapshots["test_inline_bbcode_can_be_mixed 1"] = "<p>Lorem <b><u>ipsum</u></b>!</p>"
 snapshots["test_inline_bbcode_can_be_mixed 1"] = "<p>Lorem <b><u>ipsum</u></b>!</p>"
 
 
@@ -31,7 +31,7 @@ snapshots["test_italics_bbcode 1"] = "<p>Lorem <i>ipsum</i>!</p>"
 
 
 snapshots[
 snapshots[
     "test_simple_inline_bbcode_is_escaped 1"
     "test_simple_inline_bbcode_is_escaped 1"
-] = '<p>Lorem <b>ips &lt;script language="application/javascript"&gt; um</b>!</p>'
+] = "<p>Lorem <b>ips &lt;script language=&quot;application/javascript&quot;&gt; um</b>!</p>"
 
 
 snapshots["test_underline_bbcode 1"] = "<p>Lorem <u>ipsum</u>!</p>"
 snapshots["test_underline_bbcode 1"] = "<p>Lorem <u>ipsum</u>!</p>"
 
 
@@ -41,11 +41,11 @@ snapshots[
 
 
 snapshots[
 snapshots[
     "test_url_bbcode_is_escaped 1"
     "test_url_bbcode_is_escaped 1"
-] = '<p>Lorem <a href=\'http://&lt;script language="application/javascript"&gt;\' rel="external nofollow noopener" target="_blank">&lt;script language="application/javascript"&gt;</a> ipsum</p>'
+] = '<p>Lorem <a href="http://&lt;script language=&quot;application/javascript&quot;&gt;" rel="external nofollow noopener" target="_blank">&lt;script language=&quot;application/javascript&quot;&gt;</a> ipsum</p>'
 
 
 snapshots[
 snapshots[
     "test_url_bbcode_link_text_is_escaped 1"
     "test_url_bbcode_link_text_is_escaped 1"
-] = '<p>Lorem <a href=\'http://&lt;script language="application/javascript"&gt;\' rel="external nofollow noopener" target="_blank">&lt;script language="application/javascript"&gt;</a> ipsum</p>'
+] = '<p>Lorem <a href="http://&lt;script language=&quot;application/javascript&quot;&gt;" rel="external nofollow noopener" target="_blank">&lt;script language=&quot;application/javascript&quot;&gt;</a> ipsum</p>'
 
 
 snapshots[
 snapshots[
     "test_url_bbcode_with_link_text 1"
     "test_url_bbcode_with_link_text 1"

+ 2 - 2
misago/markup/tests/snapshots/snap_test_link_handling.py

@@ -21,7 +21,7 @@ snapshots[
 
 
 snapshots[
 snapshots[
     "test_local_image_is_changed_to_relative_link 1"
     "test_local_image_is_changed_to_relative_link 1"
-] = '<p>clean_links step cleans <img alt="example.com/media/img.png" src="/media/img.png"/></p>'
+] = '<p>clean_links step cleans <img alt="example.com/media/img.png" src="/media/img.png" /></p>'
 
 
 snapshots[
 snapshots[
     "test_parser_converts_unmarked_links_to_hrefs 1"
     "test_parser_converts_unmarked_links_to_hrefs 1"
@@ -33,7 +33,7 @@ snapshots[
 
 
 snapshots[
 snapshots[
     "test_parser_skips_links_in_inline_code_bbcode 1"
     "test_parser_skips_links_in_inline_code_bbcode 1"
-] = """<p>Lorem ipsum <br/>
+] = """<p>Lorem ipsum <br />
 </p><pre><code>http://test.com</code></pre><p></p>"""
 </p><pre><code>http://test.com</code></pre><p></p>"""
 
 
 snapshots[
 snapshots[

+ 22 - 22
misago/markup/tests/snapshots/snap_test_quote_bbcode.py

@@ -8,73 +8,73 @@ from snapshottest import Snapshot
 snapshots = Snapshot()
 snapshots = Snapshot()
 
 
 snapshots[
 snapshots[
-    "test_single_line_quote 1"
+    "test_multi_line_quote 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
 <p>Sit amet elit.</p>
+<p>Another line.</p>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_single_line_authored_quote 1"
+    "test_quote_can_contain_bbcode_or_markdown 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
-<div class="quote-heading">@Bob</div>
+<div class="quote-heading"></div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
-<p>Sit amet elit.</p>
+<p>Sit <strong>amet</strong> <u>elit</u>.</p>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_single_line_authored_quote_without_quotations 1"
+    "test_quotes_can_be_nested 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
-<div class="quote-heading">@Bob</div>
+<div class="quote-heading"></div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
 <p>Sit amet elit.</p>
+<aside class="quote-block">
+<div class="quote-heading"></div>
+<blockquote class="quote-body">
+<p>Nested quote</p>
+</blockquote>
+</aside>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_quote_can_contain_bbcode_or_markdown 1"
+    "test_quotes_can_contain_hr_markdown 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
-<p>Sit <strong>amet</strong> <u>elit</u>.</p>
+<p>Sit amet elit.</p>
+<hr />
+<p>Another line.</p>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_multi_line_quote 1"
+    "test_single_line_authored_quote 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
-<div class="quote-heading"></div>
+<div class="quote-heading">@Bob</div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
 <p>Sit amet elit.</p>
-<p>Another line.</p>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_quotes_can_be_nested 1"
+    "test_single_line_authored_quote_without_quotations 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
-<div class="quote-heading"></div>
+<div class="quote-heading">@Bob</div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
 <p>Sit amet elit.</p>
-<aside class="quote-block">
-<div class="quote-heading"></div>
-<blockquote class="quote-body">
-<p>Nested quote</p>
-</blockquote>
-</aside>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_quotes_can_contain_hr_markdown 1"
+    "test_single_line_quote 1"
 ] = """<aside class="quote-block">
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
 <p>Sit amet elit.</p>
-<hr/>
-<p>Another line.</p>
 </blockquote>
 </blockquote>
 </aside>"""
 </aside>"""

+ 7 - 3
misago/markup/tests/snapshots/snap_test_short_image_markdown.py

@@ -9,14 +9,18 @@ snapshots = Snapshot()
 
 
 snapshots[
 snapshots[
     "test_short_image_markdown[base] 1"
     "test_short_image_markdown[base] 1"
-] = '<p><img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg"/></p>'
-snapshots["test_short_image_markdown[space-one-word] 1"] = "<p>! (space)</p>"
+] = '<p><img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg" /></p>'
+
 snapshots[
 snapshots[
     "test_short_image_markdown[space-multiple-words] 1"
     "test_short_image_markdown[space-multiple-words] 1"
 ] = "<p>! (space with other words)</p>"
 ] = "<p>! (space with other words)</p>"
+
+snapshots["test_short_image_markdown[space-one-word] 1"] = "<p>! (space)</p>"
+
 snapshots[
 snapshots[
     "test_short_image_markdown[text-before-mark] 1"
     "test_short_image_markdown[text-before-mark] 1"
-] = '<p>Text before exclamation mark<img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg"/></p>'
+] = '<p>Text before exclamation mark<img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg" /></p>'
+
 snapshots[
 snapshots[
     "test_short_image_markdown[text-before-with-space] 1"
     "test_short_image_markdown[text-before-with-space] 1"
 ] = "<p>Text before with space in between! (sometext)</p>"
 ] = "<p>Text before with space in between! (sometext)</p>"

+ 8 - 8
misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py

@@ -8,29 +8,29 @@ from snapshottest import Snapshot
 snapshots = Snapshot()
 snapshots = Snapshot()
 
 
 snapshots[
 snapshots[
-    "test_single_line_spoiler 1"
+    "test_multi_line_spoiler 1"
 ] = """<aside class="spoiler-block">
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
 <blockquote class="spoiler-body">
-<p>Daenerys and Jon live happily ever after!</p>
+<p>Sit amet elit.</p>
+<p>Another line.</p>
 </blockquote>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_spoiler_can_contain_bbcode_or_markdown 1"
+    "test_single_line_spoiler 1"
 ] = """<aside class="spoiler-block">
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
 <blockquote class="spoiler-body">
-<p>Sit <strong>amet</strong> <u>elit</u>.</p>
+<p>Daenerys and Jon live happily ever after!</p>
 </blockquote>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 </aside>"""
 </aside>"""
 
 
 snapshots[
 snapshots[
-    "test_multi_line_spoiler 1"
+    "test_spoiler_can_contain_bbcode_or_markdown 1"
 ] = """<aside class="spoiler-block">
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
 <blockquote class="spoiler-body">
-<p>Sit amet elit.</p>
-<p>Another line.</p>
+<p>Sit <strong>amet</strong> <u>elit</u>.</p>
 </blockquote>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 </aside>"""
 </aside>"""
@@ -55,7 +55,7 @@ snapshots[
 ] = """<aside class="spoiler-block">
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
 <blockquote class="spoiler-body">
 <p>Sit amet elit.</p>
 <p>Sit amet elit.</p>
-<hr/>
+<hr />
 <p>Another line.</p>
 <p>Another line.</p>
 </blockquote>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>

+ 5 - 5
misago/markup/tests/test_code_bbcode.py

@@ -3,7 +3,7 @@ from ..parser import parse
 
 
 def test_single_line_code(request_mock, user, snapshot):
 def test_single_line_code(request_mock, user, snapshot):
     text = '[code]echo("Hello!");[/code]'
     text = '[code]echo("Hello!");[/code]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -15,23 +15,23 @@ alert("!")
 </script>
 </script>
 [/code]
 [/code]
     """
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_code_with_language_parameter(request_mock, user, snapshot):
 def test_code_with_language_parameter(request_mock, user, snapshot):
     text = '[code=php]echo("Hello!");[/code]'
     text = '[code=php]echo("Hello!");[/code]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_code_with_quoted_language_parameter(request_mock, user, snapshot):
 def test_code_with_quoted_language_parameter(request_mock, user, snapshot):
     text = '[code="php"]echo("Hello!");[/code]'
     text = '[code="php"]echo("Hello!");[/code]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_code_block_disables_parsing(request_mock, user, snapshot):
 def test_code_block_disables_parsing(request_mock, user, snapshot):
     text = "[code]Dolor [b]met.[/b][/code]"
     text = "[code]Dolor [b]met.[/b][/code]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 3 - 3
misago/markup/tests/test_code_md.py

@@ -3,7 +3,7 @@ from ..parser import parse
 
 
 def test_single_line_code_markdown(request_mock, user, snapshot):
 def test_single_line_code_markdown(request_mock, user, snapshot):
     text = '```<script>alert("!")</script>```'
     text = '```<script>alert("!")</script>```'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -15,7 +15,7 @@ alert("!")
 </script>
 </script>
 ```
 ```
     """
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -27,5 +27,5 @@ alert("!")
 </script>
 </script>
 ```
 ```
     """
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 10 - 4
misago/markup/tests/test_escaping.py

@@ -4,23 +4,29 @@ from ..parser import parse
 
 
 def test_text_is_escaped(request_mock, user, snapshot):
 def test_text_is_escaped(request_mock, user, snapshot):
     text = '<script>alert("!")</script>'
     text = '<script>alert("!")</script>'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_inline_code_is_escaped(request_mock, user, snapshot):
 def test_inline_code_is_escaped(request_mock, user, snapshot):
     text = '`<script>alert("!")</script>`'
     text = '`<script>alert("!")</script>`'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_code_in_quote_markdown_is_escaped(request_mock, user, snapshot):
 def test_code_in_quote_markdown_is_escaped(request_mock, user, snapshot):
     text = '> <script>alert("!")</script>'
     text = '> <script>alert("!")</script>'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_code_in_quote_bbcode_is_escaped(request_mock, user, snapshot):
 def test_code_in_quote_bbcode_is_escaped(request_mock, user, snapshot):
     text = '[quote]<script>alert("!")</script>[/quote]'
     text = '[quote]<script>alert("!")</script>[/quote]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
+    snapshot.assert_match(result["parsed_text"])
+
+
+def test_code_in_quote_bbcode_header_is_escaped(request_mock, user, snapshot):
+    text = '[quote="@Us"><script>alert("!")</script>er"]Test[/quote]'
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 2 - 2
misago/markup/tests/test_hr_bbcode.py

@@ -7,11 +7,11 @@ Lorem ipsum dolor met.
 [hr]
 [hr]
 Sit amet elit.
 Sit amet elit.
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_hr_bbcode_is_skipped_if_its_part_of_paragraph(request_mock, user, snapshot):
 def test_hr_bbcode_is_skipped_if_its_part_of_paragraph(request_mock, user, snapshot):
     text = "Lorem ipsum[hr]dolor met."
     text = "Lorem ipsum[hr]dolor met."
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["parsed_text"] == "<p>Lorem ipsum[hr]dolor met.</p>"
     assert result["parsed_text"] == "<p>Lorem ipsum[hr]dolor met.</p>"

+ 69 - 0
misago/markup/tests/test_htmlparser.py

@@ -0,0 +1,69 @@
+from ..htmlparser import parse_html_string, print_html_string
+
+
+def test_parser_handles_simple_html():
+    root_node = parse_html_string("<p>Hello World!</p>")
+    assert print_html_string(root_node) == "<p>Hello World!</p>"
+
+
+def test_parser_handles_html_with_brs():
+    root_node = parse_html_string("<p>Hello<br />World!</p>")
+    assert print_html_string(root_node) == "<p>Hello<br />World!</p>"
+
+
+def test_parser_handles_html_with_hrs():
+    root_node = parse_html_string("<p>Hello</p><hr /><p>World!</p>")
+    assert print_html_string(root_node) == "<p>Hello</p><hr /><p>World!</p>"
+
+
+def test_parser_escapes_html_in_text_nodes():
+    root_node = parse_html_string("<span>Hello &lt;br&gt; World!</span>")
+    assert print_html_string(root_node) == "<span>Hello &lt;br&gt; World!</span>"
+
+
+def test_parser_escapes_quotes_in_text_nodes():
+    root_node = parse_html_string('<span>Hello "World"!</span>')
+    assert print_html_string(root_node) == "<span>Hello &quot;World&quot;!</span>"
+
+
+def test_parser_handles_attributes():
+    root_node = parse_html_string('<a href="/hello-world/">Hello World!</a>')
+    assert print_html_string(root_node) == '<a href="/hello-world/">Hello World!</a>'
+
+
+def test_parser_escapes_html_in_attributes_names():
+    root_node = parse_html_string('<span data-a<tt>r="<br>">Hello!</span>')
+    assert print_html_string(root_node) == (
+        "<span data-a&lt;tt>r=&quot;<br />&quot;&gt;Hello!</span>"
+    )
+
+
+def test_parser_escapes_quotes_in_attributes_names():
+    root_node = parse_html_string('<span "data-attr"="br">Hello!</span>')
+    assert print_html_string(root_node) == (
+        '<span &quot;data-attr&quot;="br">Hello!</span>'
+    )
+
+
+def test_parser_escapes_html_in_attributes_values():
+    root_node = parse_html_string('<span data-attr="<br>">Hello!</span>')
+    assert print_html_string(root_node) == (
+        '<span data-attr="&lt;br&gt;">Hello!</span>'
+    )
+
+
+def test_parser_handles_escaped_attribute_values():
+    root_node = parse_html_string('<span data-attr="&lt;br&gt;">Hello!</span>')
+    assert print_html_string(root_node) == (
+        '<span data-attr="&lt;br&gt;">Hello!</span>'
+    )
+
+
+def test_parser_escapes_quotes_in_attributes_values():
+    root_node = parse_html_string('<span data-attr="\'">Hello!</span>')
+    assert print_html_string(root_node) == ('<span data-attr="&#x27;">Hello!</span>')
+
+
+def test_parser_handles_bool_attributes():
+    root_node = parse_html_string("<button disabled>Hello World!</button>")
+    assert print_html_string(root_node) == "<button disabled>Hello World!</button>"

+ 16 - 16
misago/markup/tests/test_inline_bbcode.py

@@ -3,91 +3,91 @@ from ..parser import parse
 
 
 def test_bold_bbcode(request_mock, user, snapshot):
 def test_bold_bbcode(request_mock, user, snapshot):
     text = "Lorem [b]ipsum[/b]!"
     text = "Lorem [b]ipsum[/b]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_italics_bbcode(request_mock, user, snapshot):
 def test_italics_bbcode(request_mock, user, snapshot):
     text = "Lorem [i]ipsum[/i]!"
     text = "Lorem [i]ipsum[/i]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_underline_bbcode(request_mock, user, snapshot):
 def test_underline_bbcode(request_mock, user, snapshot):
     text = "Lorem [u]ipsum[/u]!"
     text = "Lorem [u]ipsum[/u]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_simple_inline_bbcode_is_escaped(request_mock, user, snapshot):
 def test_simple_inline_bbcode_is_escaped(request_mock, user, snapshot):
     text = 'Lorem [b]ips <script language="application/javascript"> um[/b]!'
     text = 'Lorem [b]ips <script language="application/javascript"> um[/b]!'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_inline_bbcode_can_be_mixed(request_mock, user, snapshot):
 def test_inline_bbcode_can_be_mixed(request_mock, user, snapshot):
     text = "Lorem [b][u]ipsum[/u][/b]!"
     text = "Lorem [b][u]ipsum[/u][/b]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_inline_bbcode_can_be_mixed_with_markdown(request_mock, user, snapshot):
 def test_inline_bbcode_can_be_mixed_with_markdown(request_mock, user, snapshot):
     text = "Lorem [b]**ipsum**[/b]!"
     text = "Lorem [b]**ipsum**[/b]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_image_bbcode(request_mock, user, snapshot):
 def test_image_bbcode(request_mock, user, snapshot):
     text = "Lorem [img]https://placekitten.com/g/1200/500[/img] ipsum"
     text = "Lorem [img]https://placekitten.com/g/1200/500[/img] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_image_bbcode_is_escaped(request_mock, user, snapshot):
 def test_image_bbcode_is_escaped(request_mock, user, snapshot):
     text = 'Lorem [img]<script language="application/javascript">[/img] ipsum'
     text = 'Lorem [img]<script language="application/javascript">[/img] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_image_bbcode_is_case_insensitive(request_mock, user, snapshot):
 def test_image_bbcode_is_case_insensitive(request_mock, user, snapshot):
     text = "Lorem [iMg]https://placekitten.com/g/1200/500[/ImG] ipsum"
     text = "Lorem [iMg]https://placekitten.com/g/1200/500[/ImG] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_url_bbcode(request_mock, user, snapshot):
 def test_url_bbcode(request_mock, user, snapshot):
     text = "Lorem [url]https://placekitten.com/g/1200/500[/url] ipsum"
     text = "Lorem [url]https://placekitten.com/g/1200/500[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_url_bbcode_is_escaped(request_mock, user, snapshot):
 def test_url_bbcode_is_escaped(request_mock, user, snapshot):
     text = 'Lorem [url]<script language="application/javascript">[/url] ipsum'
     text = 'Lorem [url]<script language="application/javascript">[/url] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_url_bbcode_with_link_text(request_mock, user, snapshot):
 def test_url_bbcode_with_link_text(request_mock, user, snapshot):
     text = "Lorem [url=https://placekitten.com/g/1200/500]dolor[/url] ipsum"
     text = "Lorem [url=https://placekitten.com/g/1200/500]dolor[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_url_bbcode_with_long_link_text(request_mock, user, snapshot):
 def test_url_bbcode_with_long_link_text(request_mock, user, snapshot):
     text = "Lorem [url=https://placekitten.com/g/1200/500]dolor met[/url] ipsum"
     text = "Lorem [url=https://placekitten.com/g/1200/500]dolor met[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_url_bbcode_with_quotes_and_link_text(request_mock, user, snapshot):
 def test_url_bbcode_with_quotes_and_link_text(request_mock, user, snapshot):
     text = 'Lorem [url="https://placekitten.com/g/1200/500"]dolor[/url] ipsum'
     text = 'Lorem [url="https://placekitten.com/g/1200/500"]dolor[/url] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_url_bbcode_with_quotes_and_long_link_text(request_mock, user, snapshot):
 def test_url_bbcode_with_quotes_and_long_link_text(request_mock, user, snapshot):
     text = 'Lorem [url="https://placekitten.com/g/1200/500"]dolor met[/url] ipsum'
     text = 'Lorem [url="https://placekitten.com/g/1200/500"]dolor met[/url] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -98,5 +98,5 @@ def test_url_bbcode_link_text_is_escaped(request_mock, user, snapshot):
         "[/url] ipsum"
         "[/url] ipsum"
     )
     )
 
 
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 22 - 22
misago/markup/tests/test_link_handling.py

@@ -3,19 +3,19 @@ from ..parser import parse
 
 
 def test_parser_converts_unmarked_links_to_hrefs(request_mock, user, snapshot):
 def test_parser_converts_unmarked_links_to_hrefs(request_mock, user, snapshot):
     text = "Lorem ipsum http://test.com"
     text = "Lorem ipsum http://test.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_parser_skips_links_in_inline_code_markdown(request_mock, user, snapshot):
 def test_parser_skips_links_in_inline_code_markdown(request_mock, user, snapshot):
     text = "Lorem ipsum `http://test.com`"
     text = "Lorem ipsum `http://test.com`"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_parser_skips_links_in_inline_code_bbcode(request_mock, user, snapshot):
 def test_parser_skips_links_in_inline_code_bbcode(request_mock, user, snapshot):
     text = "Lorem ipsum [code]http://test.com[/code]"
     text = "Lorem ipsum [code]http://test.com[/code]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -25,7 +25,7 @@ def test_parser_skips_links_in_code_bbcode(request_mock, user, snapshot):
 http://test.com
 http://test.com
 [/code]
 [/code]
     """
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -33,13 +33,13 @@ def test_absolute_link_to_site_is_changed_to_relative_link(
     request_mock, user, snapshot
     request_mock, user, snapshot
 ):
 ):
     text = "clean_links step cleans http://example.com"
     text = "clean_links step cleans http://example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_absolute_link_to_site_is_added_to_internal_links_list(request_mock, user):
 def test_absolute_link_to_site_is_added_to_internal_links_list(request_mock, user):
     text = "clean_links step cleans http://example.com"
     text = "clean_links step cleans http://example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/"]
     assert result["internal_links"] == ["/"]
 
 
 
 
@@ -47,7 +47,7 @@ def test_absolute_link_to_site_without_schema_is_changed_to_relative_link(
     request_mock, user, snapshot
     request_mock, user, snapshot
 ):
 ):
     text = "clean_links step cleans example.com"
     text = "clean_links step cleans example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -55,7 +55,7 @@ def test_absolute_link_to_site_without_schema_is_added_to_internal_links_list(
     request_mock, user
     request_mock, user
 ):
 ):
     text = "clean_links step cleans example.com"
     text = "clean_links step cleans example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/"]
     assert result["internal_links"] == ["/"]
 
 
 
 
@@ -63,7 +63,7 @@ def test_absolute_link_with_path_to_site_is_changed_to_relative_link(
     request_mock, user, snapshot
     request_mock, user, snapshot
 ):
 ):
     text = "clean_links step cleans http://example.com/somewhere-something/"
     text = "clean_links step cleans http://example.com/somewhere-something/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -71,25 +71,25 @@ def test_absolute_link_with_path_to_site_is_added_to_internal_links_list(
     request_mock, user
     request_mock, user
 ):
 ):
     text = "clean_links step cleans http://example.com/somewhere-something/"
     text = "clean_links step cleans http://example.com/somewhere-something/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/somewhere-something/"]
     assert result["internal_links"] == ["/somewhere-something/"]
 
 
 
 
 def test_full_link_with_path_text_is_set_to_domain_and_path(request_mock, user):
 def test_full_link_with_path_text_is_set_to_domain_and_path(request_mock, user):
     text = "clean_links step cleans http://example.com/somewhere-something/"
     text = "clean_links step cleans http://example.com/somewhere-something/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert ">example.com/somewhere-something/<" in result["parsed_text"]
     assert ">example.com/somewhere-something/<" in result["parsed_text"]
 
 
 
 
 def test_outgoing_link_is_added_to_outgoing_links_list(request_mock, user):
 def test_outgoing_link_is_added_to_outgoing_links_list(request_mock, user):
     text = "clean_links step cleans https://other.com"
     text = "clean_links step cleans https://other.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com"]
     assert result["outgoing_links"] == ["other.com"]
 
 
 
 
 def test_outgoing_llink_includes_external_nofollow_and_noopener(request_mock, user):
 def test_outgoing_llink_includes_external_nofollow_and_noopener(request_mock, user):
     text = "Lorem [url]https://placekitten.com/g/1200/500[/url] ipsum"
     text = "Lorem [url]https://placekitten.com/g/1200/500[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert 'rel="external nofollow noopener"' in result["parsed_text"]
     assert 'rel="external nofollow noopener"' in result["parsed_text"]
 
 
 
 
@@ -97,44 +97,44 @@ def test_outgoing_link_without_scheme_is_added_to_outgoing_links_list(
     request_mock, user
     request_mock, user
 ):
 ):
     text = "clean_links step cleans other.com"
     text = "clean_links step cleans other.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com"]
     assert result["outgoing_links"] == ["other.com"]
 
 
 
 
 def test_outgoing_link_with_path_is_added_to_outgoing_links_list(request_mock, user):
 def test_outgoing_link_with_path_is_added_to_outgoing_links_list(request_mock, user):
     text = "clean_links step cleans other.com/some/path/"
     text = "clean_links step cleans other.com/some/path/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com/some/path/"]
     assert result["outgoing_links"] == ["other.com/some/path/"]
 
 
 
 
 def test_local_image_is_changed_to_relative_link(request_mock, user, snapshot):
 def test_local_image_is_changed_to_relative_link(request_mock, user, snapshot):
     text = "clean_links step cleans !(example.com/media/img.png)"
     text = "clean_links step cleans !(example.com/media/img.png)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_local_image_is_added_to_images_list(request_mock, user):
 def test_local_image_is_added_to_images_list(request_mock, user):
     text = "clean_links step cleans !(example.com/media/img.png)"
     text = "clean_links step cleans !(example.com/media/img.png)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["images"] == ["/media/img.png"]
     assert result["images"] == ["/media/img.png"]
 
 
 
 
 def test_remote_image_is_added_to_images_list(request_mock, user):
 def test_remote_image_is_added_to_images_list(request_mock, user):
     text = "clean_links step cleans !(other.com/media/img.png)"
     text = "clean_links step cleans !(other.com/media/img.png)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["images"] == ["other.com/media/img.png"]
     assert result["images"] == ["other.com/media/img.png"]
 
 
 
 
 def test_local_image_link_is_added_to_images_and_links_lists(request_mock, user):
 def test_local_image_link_is_added_to_images_and_links_lists(request_mock, user):
     text = "clean_links step cleans [!(example.com/media/img.png)](example.com/test/)"
     text = "clean_links step cleans [!(example.com/media/img.png)](example.com/test/)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/test/"]
     assert result["internal_links"] == ["/test/"]
     assert result["images"] == ["/media/img.png"]
     assert result["images"] == ["/media/img.png"]
 
 
 
 
 def test_remote_image_link_is_added_to_images_and_links_lists(request_mock, user):
 def test_remote_image_link_is_added_to_images_and_links_lists(request_mock, user):
     text = "clean_links step cleans [!(other.com/media/img.png)](other.com/test/)"
     text = "clean_links step cleans [!(other.com/media/img.png)](other.com/test/)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com/test/"]
     assert result["outgoing_links"] == ["other.com/test/"]
     assert result["images"] == ["other.com/media/img.png"]
     assert result["images"] == ["other.com/media/img.png"]
 
 
@@ -143,7 +143,7 @@ def test_parser_adds_shva_to_attachment_link_querystring_if_force_option_is_enab
     request_mock, user
     request_mock, user
 ):
 ):
     text = "clean_links step cleans ![3.png](http://example.com/a/thumb/test/43/)"
     text = "clean_links step cleans ![3.png](http://example.com/a/thumb/test/43/)"
-    result = parse(text, request_mock, user, minify=False, force_shva=True)
+    result = parse(text, request_mock, user, force_shva=True)
     assert "/a/thumb/test/43/?shva=1" in result["parsed_text"]
     assert "/a/thumb/test/43/?shva=1" in result["parsed_text"]
 
 
 
 
@@ -151,5 +151,5 @@ def test_parser_skips_shva_in_attachment_link_querystring_if_force_option_is_omi
     request_mock, user
     request_mock, user
 ):
 ):
     text = "clean_links step cleans ![3.png](http://example.com/a/thumb/test/43/)"
     text = "clean_links step cleans ![3.png](http://example.com/a/thumb/test/43/)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert "?shva=1" not in result["parsed_text"]
     assert "?shva=1" not in result["parsed_text"]

+ 52 - 21
misago/markup/tests/test_mentions.py

@@ -1,36 +1,51 @@
+from ..htmlparser import parse_html_string, print_html_string
 from ..mentions import add_mentions
 from ..mentions import add_mentions
 
 
 
 
-def test_util_replaces_mention_with_link_to_user_profile_in_parsed_text(
-    request_mock, user
-):
+def test_util_replaces_mention_with_link_to_user_profile_in_parsed_text(user):
     parsing_result = {"parsed_text": f"<p>Hello, @{user.username}!</p>", "mentions": []}
     parsing_result = {"parsed_text": f"<p>Hello, @{user.username}!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == (
     assert parsing_result["parsed_text"] == (
         f'<p>Hello, <a href="{user.get_absolute_url()}">@{user.username}</a>!</p>'
         f'<p>Hello, <a href="{user.get_absolute_url()}">@{user.username}</a>!</p>'
     )
     )
 
 
 
 
-def test_util_adds_mention_to_parsig_result(request_mock, user):
+def test_util_adds_mention_to_parsig_result(user):
     parsing_result = {"parsed_text": f"<p>Hello, @{user.username}!</p>", "mentions": []}
     parsing_result = {"parsed_text": f"<p>Hello, @{user.username}!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
-    assert parsing_result["mentions"] == [user]
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
+    assert parsing_result["mentions"] == [user.id]
 
 
 
 
-def test_mentions_arent_added_for_nonexisting_user(request_mock, user):
+def test_mentions_arent_added_for_nonexisting_user(user):
     parsing_result = {"parsed_text": f"<p>Hello, @OtherUser!</p>", "mentions": []}
     parsing_result = {"parsed_text": f"<p>Hello, @OtherUser!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == "<p>Hello, @OtherUser!</p>"
     assert parsing_result["parsed_text"] == "<p>Hello, @OtherUser!</p>"
 
 
 
 
 def test_util_replaces_multiple_mentions_with_link_to_user_profiles_in_parsed_text(
 def test_util_replaces_multiple_mentions_with_link_to_user_profiles_in_parsed_text(
-    request_mock, user, other_user
+    user, other_user
 ):
 ):
     parsing_result = {
     parsing_result = {
         "parsed_text": f"<p>Hello, @{user.username} and @{other_user.username}!</p>",
         "parsed_text": f"<p>Hello, @{user.username} and @{other_user.username}!</p>",
         "mentions": [],
         "mentions": [],
     }
     }
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert (
     assert (
         f'<a href="{user.get_absolute_url()}">@{user.username}</a>'
         f'<a href="{user.get_absolute_url()}">@{user.username}</a>'
         in parsing_result["parsed_text"]
         in parsing_result["parsed_text"]
@@ -41,38 +56,54 @@ def test_util_replaces_multiple_mentions_with_link_to_user_profiles_in_parsed_te
     )
     )
 
 
 
 
-def test_util_adds_multiple_mentions_to_parsig_result(request_mock, user, other_user):
+def test_util_adds_multiple_mentions_to_parsig_result(user, other_user):
     parsing_result = {
     parsing_result = {
         "parsed_text": f"<p>Hello, @{user.username} and @{other_user.username}!</p>",
         "parsed_text": f"<p>Hello, @{user.username} and @{other_user.username}!</p>",
         "mentions": [],
         "mentions": [],
     }
     }
-    add_mentions(request_mock, parsing_result)
-    assert parsing_result["mentions"] == [user, other_user]
+    root_node = parse_html_string(parsing_result["parsed_text"])
 
 
+    add_mentions(parsing_result, root_node)
 
 
-def test_util_handles_repeated_mentions_of_same_user(request_mock, user):
+    parsing_result["parsed_text"] = print_html_string(root_node)
+    assert parsing_result["mentions"] == [user.id, other_user.id]
+
+
+def test_util_handles_repeated_mentions_of_same_user(user):
     parsing_result = {
     parsing_result = {
         "parsed_text": f"<p>Hello, @{user.username} and @{user.username}!</p>",
         "parsed_text": f"<p>Hello, @{user.username} and @{user.username}!</p>",
         "mentions": [],
         "mentions": [],
     }
     }
-    add_mentions(request_mock, parsing_result)
-    assert parsing_result["mentions"] == [user]
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
+    assert parsing_result["mentions"] == [user.id]
 
 
 
 
-def test_util_skips_mentions_in_links(request_mock, user, snapshot):
+def test_util_skips_mentions_in_links(user, snapshot):
     parsing_result = {
     parsing_result = {
         "parsed_text": f'<p>Hello, <a href="/">@{user.username}</a></p>',
         "parsed_text": f'<p>Hello, <a href="/">@{user.username}</a></p>',
         "mentions": [],
         "mentions": [],
     }
     }
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == (
     assert parsing_result["parsed_text"] == (
         f'<p>Hello, <a href="/">@{user.username}</a></p>'
         f'<p>Hello, <a href="/">@{user.username}</a></p>'
     )
     )
     assert parsing_result["mentions"] == []
     assert parsing_result["mentions"] == []
 
 
 
 
-def test_util_handles_text_without_mentions(request_mock):
+def test_util_handles_text_without_mentions(db):
     parsing_result = {"parsed_text": f"<p>Hello, world!</p>", "mentions": []}
     parsing_result = {"parsed_text": f"<p>Hello, world!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == ("<p>Hello, world!</p>")
     assert parsing_result["parsed_text"] == ("<p>Hello, world!</p>")
     assert parsing_result["mentions"] == []
     assert parsing_result["mentions"] == []

+ 0 - 17
misago/markup/tests/test_parser.py

@@ -1,17 +0,0 @@
-from ..parser import parse
-
-
-def test_html_is_escaped(request_mock, user, snapshot):
-    text = "Lorem <strong>ipsum!</strong>"
-    result = parse(text, request_mock, user, minify=True)
-    snapshot.assert_match(result["parsed_text"])
-
-
-def test_parsed_text_is_minified(request_mock, user, snapshot):
-    text = """
-Lorem **ipsum** dolor met.
-
-Sit amet elit.
-"""
-    result = parse(text, request_mock, user, minify=True)
-    snapshot.assert_match(result["parsed_text"])

+ 7 - 7
misago/markup/tests/test_quote_bbcode.py

@@ -3,25 +3,25 @@ from ..parser import parse
 
 
 def test_single_line_quote(request_mock, user, snapshot):
 def test_single_line_quote(request_mock, user, snapshot):
     text = "[quote]Sit amet elit.[/quote]"
     text = "[quote]Sit amet elit.[/quote]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_single_line_authored_quote(request_mock, user, snapshot):
 def test_single_line_authored_quote(request_mock, user, snapshot):
     text = '[quote="@Bob"]Sit amet elit.[/quote]'
     text = '[quote="@Bob"]Sit amet elit.[/quote]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_single_line_authored_quote_without_quotations(request_mock, user, snapshot):
 def test_single_line_authored_quote_without_quotations(request_mock, user, snapshot):
     text = "[quote=@Bob]Sit amet elit.[/quote]"
     text = "[quote=@Bob]Sit amet elit.[/quote]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_quote_can_contain_bbcode_or_markdown(request_mock, user, snapshot):
 def test_quote_can_contain_bbcode_or_markdown(request_mock, user, snapshot):
     text = "[quote]Sit **amet** [u]elit[/u].[/quote]"
     text = "[quote]Sit **amet** [u]elit[/u].[/quote]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -33,7 +33,7 @@ Sit amet elit.
 Another line.
 Another line.
 [/quote]
 [/quote]
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -44,7 +44,7 @@ Sit amet elit.
 [quote]Nested quote[/quote]
 [quote]Nested quote[/quote]
 [/quote]
 [/quote]
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -57,5 +57,5 @@ Sit amet elit.
 Another line.
 Another line.
 [/quote]
 [/quote]
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 1 - 1
misago/markup/tests/test_short_image_markdown.py

@@ -19,5 +19,5 @@ from ..parser import parse
     ],
     ],
 )
 )
 def test_short_image_markdown(request_mock, user, snapshot, text):
 def test_short_image_markdown(request_mock, user, snapshot, text):
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 5 - 5
misago/markup/tests/test_spoiler_bbcode.py

@@ -3,13 +3,13 @@ from ..parser import parse
 
 
 def test_single_line_spoiler(request_mock, user, snapshot):
 def test_single_line_spoiler(request_mock, user, snapshot):
     text = "[spoiler]Daenerys and Jon live happily ever after![/spoiler]"
     text = "[spoiler]Daenerys and Jon live happily ever after![/spoiler]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
 def test_spoiler_can_contain_bbcode_or_markdown(request_mock, user, snapshot):
 def test_spoiler_can_contain_bbcode_or_markdown(request_mock, user, snapshot):
     text = "[spoiler]Sit **amet** [u]elit[/u].[/spoiler]"
     text = "[spoiler]Sit **amet** [u]elit[/u].[/spoiler]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -21,7 +21,7 @@ Sit amet elit.
 Another line.
 Another line.
 [/spoiler]
 [/spoiler]
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -32,7 +32,7 @@ Sit amet elit.
 [spoiler]Nested spoiler[/spoiler]
 [spoiler]Nested spoiler[/spoiler]
 [/spoiler]
 [/spoiler]
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])
 
 
 
 
@@ -45,5 +45,5 @@ Sit amet elit.
 Another line.
 Another line.
 [/spoiler]
 [/spoiler]
 """
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 1 - 1
misago/markup/tests/test_strikethrough_markdown.py

@@ -3,5 +3,5 @@ from ..parser import parse
 
 
 def test_strikethrough_markdown(request_mock, user, snapshot):
 def test_strikethrough_markdown(request_mock, user, snapshot):
     text = "Lorem ~~ipsum~~ dolor met!"
     text = "Lorem ~~ipsum~~ dolor met!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
     snapshot.assert_match(result["parsed_text"])

+ 3 - 3
misago/threads/api/postingendpoint/mentions.py

@@ -8,9 +8,9 @@ class MentionsMiddleware(PostingMiddleware):
             existing_mentions = self.get_existing_mentions()
             existing_mentions = self.get_existing_mentions()
 
 
         new_mentions = []
         new_mentions = []
-        for user in self.post.parsing_result["mentions"]:
-            if user.pk not in existing_mentions:
-                new_mentions.append(user)
+        for user_pk in self.post.parsing_result["mentions"]:
+            if user_pk not in existing_mentions:
+                new_mentions.append(user_pk)
 
 
         if new_mentions:
         if new_mentions:
             self.post.mentions.add(*new_mentions)
             self.post.mentions.add(*new_mentions)

+ 2 - 3
misago/threads/tests/test_post_mentions.py

@@ -55,7 +55,7 @@ class PostMentionsTests(AuthenticatedUserTestCase):
         self.assertEqual(post.mentions.all()[0], self.user)
         self.assertEqual(post.mentions.all()[0], self.user)
 
 
     def test_mention_limit(self):
     def test_mention_limit(self):
-        """endpoint mentions limits mentions to 24 users"""
+        """endpoint mentions over limit results in no mentions set"""
         users = []
         users = []
 
 
         for i in range(MENTIONS_LIMIT + 5):
         for i in range(MENTIONS_LIMIT + 5):
@@ -70,8 +70,7 @@ class PostMentionsTests(AuthenticatedUserTestCase):
 
 
         post = self.user.post_set.order_by("id").last()
         post = self.user.post_set.order_by("id").last()
 
 
-        self.assertEqual(post.mentions.count(), 24)
-        self.assertEqual(list(post.mentions.order_by("id")), users[:24])
+        self.assertEqual(post.mentions.count(), 0)
 
 
     def test_mention_update(self):
     def test_mention_update(self):
         """edit post endpoint updates mentions"""
         """edit post endpoint updates mentions"""

+ 0 - 3
requirements.in

@@ -1,13 +1,10 @@
 ariadne
 ariadne
 ariadne_django
 ariadne_django
-beautifulsoup4<4.8
-bleach
 celery[redis]
 celery[redis]
 coveralls
 coveralls
 django<4
 django<4
 djangorestframework
 djangorestframework
 django-debug-toolbar
 django-debug-toolbar
-django-htmlmin
 django-mptt
 django-mptt
 django-simple-sso
 django-simple-sso
 Faker
 Faker

+ 2 - 17
requirements.txt

@@ -20,14 +20,8 @@ async-timeout==4.0.2
     # via redis
     # via redis
 attrs==22.1.0
 attrs==22.1.0
     # via pytest
     # via pytest
-beautifulsoup4==4.7.1
-    # via
-    #   -r requirements.in
-    #   django-htmlmin
 billiard==3.6.4.0
 billiard==3.6.4.0
     # via celery
     # via celery
-bleach==5.0.1
-    # via -r requirements.in
 celery[redis]==5.2.7
 celery[redis]==5.2.7
     # via -r requirements.in
     # via -r requirements.in
 certifi==2022.6.15
 certifi==2022.6.15
@@ -73,8 +67,6 @@ django==3.2.15
     #   webservices
     #   webservices
 django-debug-toolbar==3.5.0
 django-debug-toolbar==3.5.0
     # via -r requirements.in
     # via -r requirements.in
-django-htmlmin==0.11.0
-    # via -r requirements.in
 django-js-asset==2.0.0
 django-js-asset==2.0.0
     # via django-mptt
     # via django-mptt
 django-mptt==0.13.4
 django-mptt==0.13.4
@@ -92,9 +84,7 @@ fastdiff==0.3.0
 graphql-core==3.2.1
 graphql-core==3.2.1
     # via ariadne
     # via ariadne
 html5lib==1.1
 html5lib==1.1
-    # via
-    #   -r requirements.in
-    #   django-htmlmin
+    # via -r requirements.in
 idna==3.3
 idna==3.3
     # via
     # via
     #   anyio
     #   anyio
@@ -173,7 +163,6 @@ responses==0.21.0
     # via -r requirements.in
     # via -r requirements.in
 six==1.16.0
 six==1.16.0
     # via
     # via
-    #   bleach
     #   click-repl
     #   click-repl
     #   html5lib
     #   html5lib
     #   python-dateutil
     #   python-dateutil
@@ -186,8 +175,6 @@ social-auth-app-django==5.0.0
     # via -r requirements.in
     # via -r requirements.in
 social-auth-core==4.3.0
 social-auth-core==4.3.0
     # via social-auth-app-django
     # via social-auth-app-django
-soupsieve==2.3.2.post1
-    # via beautifulsoup4
 sqlparse==0.4.2
 sqlparse==0.4.2
     # via
     # via
     #   django
     #   django
@@ -220,9 +207,7 @@ wasmer-compiler-cranelift==1.1.0
 wcwidth==0.2.5
 wcwidth==0.2.5
     # via prompt-toolkit
     # via prompt-toolkit
 webencodings==0.5.1
 webencodings==0.5.1
-    # via
-    #   bleach
-    #   html5lib
+    # via html5lib
 webservices[django]==0.7
 webservices[django]==0.7
     # via django-simple-sso
     # via django-simple-sso
 wrapt==1.14.1
 wrapt==1.14.1