Просмотр исходного кода

Remove beautiful soup dependency (#1453)

* WIP replace beautiful soup with custom HTML util

* Format code with black

* Remove unused imports

* Remove beautifulsoup, bleach and htmlmin

* Bump version to 0.29

* Remove BS4 import

* Fix function call

* Tweak HTML parser

* Tweak parser
Rafał Pitoń 2 лет назад
Родитель
Сommit
161da7d69d
33 измененных файлов с 711 добавлено и 367 удалено
  1. 1 1
      misago/__init__.py
  2. 0 5
      misago/conf/defaults.py
  3. 110 0
      misago/markup/htmlparser.py
  4. 199 0
      misago/markup/links.py
  5. 104 41
      misago/markup/mentions.py
  6. 16 130
      misago/markup/parser.py
  7. 11 7
      misago/markup/pipeline.py
  8. 6 4
      misago/markup/tests/snapshots/snap_test_code_bbcode.py
  9. 3 3
      misago/markup/tests/snapshots/snap_test_code_md.py
  10. 15 4
      misago/markup/tests/snapshots/snap_test_escaping.py
  11. 1 1
      misago/markup/tests/snapshots/snap_test_hr_bbcode.py
  12. 6 6
      misago/markup/tests/snapshots/snap_test_inline_bbcode.py
  13. 2 2
      misago/markup/tests/snapshots/snap_test_link_handling.py
  14. 22 22
      misago/markup/tests/snapshots/snap_test_quote_bbcode.py
  15. 7 3
      misago/markup/tests/snapshots/snap_test_short_image_markdown.py
  16. 8 8
      misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py
  17. 5 5
      misago/markup/tests/test_code_bbcode.py
  18. 3 3
      misago/markup/tests/test_code_md.py
  19. 10 4
      misago/markup/tests/test_escaping.py
  20. 2 2
      misago/markup/tests/test_hr_bbcode.py
  21. 69 0
      misago/markup/tests/test_htmlparser.py
  22. 16 16
      misago/markup/tests/test_inline_bbcode.py
  23. 22 22
      misago/markup/tests/test_link_handling.py
  24. 52 21
      misago/markup/tests/test_mentions.py
  25. 0 17
      misago/markup/tests/test_parser.py
  26. 7 7
      misago/markup/tests/test_quote_bbcode.py
  27. 1 1
      misago/markup/tests/test_short_image_markdown.py
  28. 5 5
      misago/markup/tests/test_spoiler_bbcode.py
  29. 1 1
      misago/markup/tests/test_strikethrough_markdown.py
  30. 3 3
      misago/threads/api/postingendpoint/mentions.py
  31. 2 3
      misago/threads/tests/test_post_mentions.py
  32. 0 3
      requirements.in
  33. 2 17
      requirements.txt

+ 1 - 1
misago/__init__.py

@@ -1,5 +1,5 @@
 from .plugins.pluginlist import load_plugin_list_if_exists
 
 
-__version__ = "0.28.2"
+__version__ = "0.29.0"
 __released__ = True

+ 0 - 5
misago/conf/defaults.py

@@ -36,11 +36,6 @@ MISAGO_USER_DATA_DOWNLOADS_WORKING_DIR = None
 MISAGO_MARKUP_EXTENSIONS = []
 
 
-# Bleach callbacks for linkifying paragraphs
-
-MISAGO_BLEACH_CALLBACKS = []
-
-
 # Custom post validators
 
 MISAGO_POST_VALIDATORS = []

+ 110 - 0
misago/markup/htmlparser.py

@@ -0,0 +1,110 @@
+import html
+from dataclasses import dataclass
+
+import html5lib
+
+SINGLETON_TAGS = (
+    "area",
+    "base",
+    "br",
+    "col",
+    "command",
+    "embed",
+    "hr",
+    "img",
+    "input",
+    "keygen",
+    "link",
+    "meta",
+    "param",
+    "source",
+    "track",
+    "wbr",
+)
+
+
+class Node:
+    def __str__(self):
+        raise NotImplementedError("Subclasses of 'Node' need to implement __str__")
+
+
+@dataclass
+class RootNode(Node):
+    tag = None
+    children: list
+
+    def __str__(self):
+        return "".join(str(child) for child in self.children)
+
+
+@dataclass
+class ElementNode(Node):
+    tag: str
+    attrs: dict
+    children: list
+
+    def __str__(self):
+        attrs_padding = " " if self.attrs else ""
+        attrs = " ".join(self.attrs_str())
+
+        if self.tag in SINGLETON_TAGS:
+            return f"<{self.tag}{attrs_padding}{attrs} />"
+
+        children = "".join(str(child) for child in self.children)
+        return f"<{self.tag}{attrs_padding}{attrs}>{children}</{self.tag}>"
+
+    def attrs_str(self):
+        for name, value in self.attrs.items():
+            if value is True or not value:
+                yield html.escape(str(name))
+            else:
+                yield (f'{html.escape(str(name))}="{html.escape(str(value))}"')
+
+
+@dataclass
+class TextNode(Node):
+    text: str
+
+    def __str__(self):
+        return html.escape(self.text)
+
+
+def parse_html_string(string: str) -> RootNode:
+    element = html5lib.parse(
+        string,
+        namespaceHTMLElements=False,
+    )
+
+    body = element.find("body")
+    root_node = RootNode(children=[])
+
+    if body.text:
+        root_node.children.append(TextNode(text=body.text))
+
+    for child in body:
+        add_child_node(root_node, child)
+
+    return root_node
+
+
+def add_child_node(parent, element):
+    node = ElementNode(
+        tag=element.tag,
+        attrs=element.attrib,
+        children=[],
+    )
+
+    if element.text:
+        node.children.append(TextNode(text=element.text))
+
+    parent.children.append(node)
+
+    if element.tail:
+        parent.children.append(TextNode(text=element.tail))
+
+    for child in element:
+        add_child_node(node, child)
+
+
+def print_html_string(root_node: RootNode) -> str:
+    return str(root_node)

+ 199 - 0
misago/markup/links.py

@@ -0,0 +1,199 @@
+import re
+from typing import Union
+
+from django.http import Http404
+from django.urls import resolve
+
+from .htmlparser import ElementNode, RootNode, TextNode
+
+MISAGO_ATTACHMENT_VIEWS = ("misago:attachment", "misago:attachment-thumbnail")
+URL_RE = re.compile(
+    r"(https?://)?"
+    r"(www\.)?"
+    r"(\w+((-|_)\w+)?\.)?"
+    r"\w+((_|-|\w)+)?(\.[a-z][a-z]+)"
+    r"(:[1-9][0-9]+)?"
+    r"([^\s<>\[\]\(\);:]+)?"
+)
+
+
+def linkify_texts(node: Union[RootNode, ElementNode]):
+    # Skip link replacement in some nodes
+    if node.tag in ("pre", "code", "a"):
+        return
+
+    new_children = []
+    for child in node.children:
+        if isinstance(child, TextNode):
+            if URL_RE.search(child.text):
+                new_children += replace_links_in_text(child.text)
+            else:
+                new_children.append(child)
+        else:
+            new_children.append(child)
+            linkify_texts(child)
+
+    node.children = new_children
+
+
+def replace_links_in_text(text: str) -> list:
+    nodes = []
+
+    while True:
+        match = URL_RE.search(text)
+        if not match:
+            if text:
+                nodes.append(TextNode(text=text))
+            return nodes
+
+        start, end = match.span()
+        url = text[start:end]
+
+        # Append text between 0 and start to nodes
+        if start > 0:
+            nodes.append(TextNode(text=text[:start]))
+
+        nodes.append(
+            ElementNode(
+                tag="a",
+                attrs={"href": url},
+                children=[
+                    TextNode(text=strip_link_protocol(url)),
+                ],
+            )
+        )
+
+        text = text[end:]
+
+
+def clean_links(
+    request,
+    result,
+    node: Union[RootNode, ElementNode, TextNode],
+    force_shva=False,
+):
+    if isinstance(node, TextNode):
+        return
+
+    for child in node.children:
+        if not isinstance(child, ElementNode):
+            continue
+
+        if child.tag == "a":
+            clean_link_node(request, result, child, force_shva)
+            clean_links(request, result, child, force_shva)
+        elif child.tag == "img":
+            clean_image_node(request, result, child, force_shva)
+        else:
+            clean_links(request, result, child, force_shva)
+
+
+def clean_link_node(
+    request,
+    result: dict,
+    node: ElementNode,
+    force_shva: bool,
+):
+    host = request.get_host()
+    href = node.attrs.get("href") or "/"
+
+    if is_internal_link(href, host):
+        href = clean_internal_link(href, host)
+        result["internal_links"].append(href)
+        href = clean_attachment_link(href, force_shva)
+    else:
+        result["outgoing_links"].append(strip_link_protocol(href))
+        href = assert_link_prefix(href)
+        node.attrs["rel"] = "external nofollow noopener"
+
+    node.attrs["target"] = "_blank"
+    node.attrs["href"] = href
+
+    if len(node.children) == 0:
+        node.children.append(strip_link_protocol(href))
+    elif len(node.children) == 1 and isinstance(node.children[0], TextNode):
+        text = node.children[0].text
+        if URL_RE.match(text):
+            node.children[0].text = strip_link_protocol(text)
+
+
+def clean_image_node(
+    request,
+    result: dict,
+    node: ElementNode,
+    force_shva: bool,
+):
+    host = request.get_host()
+    src = node.attrs.get("src") or "/"
+
+    node.attrs["alt"] = strip_link_protocol(node.attrs["alt"])
+
+    if is_internal_link(src, host):
+        src = clean_internal_link(src, host)
+        result["images"].append(src)
+        src = clean_attachment_link(src, force_shva)
+    else:
+        result["images"].append(strip_link_protocol(src))
+        src = assert_link_prefix(src)
+
+    node.attrs["src"] = src
+
+
+def is_internal_link(link, host):
+    if link.startswith("/") and not link.startswith("//"):
+        return True
+
+    link = strip_link_protocol(link).lstrip("www.").lower()
+    return link.lower().startswith(host.lstrip("www."))
+
+
+def strip_link_protocol(link):
+    if link.lower().startswith("https:"):
+        link = link[6:]
+    if link.lower().startswith("http:"):
+        link = link[5:]
+    if link.startswith("//"):
+        link = link[2:]
+    return link
+
+
+def assert_link_prefix(link):
+    if link.lower().startswith("https:"):
+        return link
+    if link.lower().startswith("http:"):
+        return link
+    if link.startswith("//"):
+        return "http:%s" % link
+
+    return "http://%s" % link
+
+
+def clean_internal_link(link, host):
+    link = strip_link_protocol(link)
+
+    if link.lower().startswith("www."):
+        link = link[4:]
+    if host.lower().startswith("www."):
+        host = host[4:]
+
+    if link.lower().startswith(host):
+        link = link[len(host) :]
+
+    return link or "/"
+
+
+def clean_attachment_link(link, force_shva=False):
+    try:
+        resolution = resolve(link)
+        if not resolution.namespaces:
+            return link
+        url_name = ":".join(resolution.namespaces + [resolution.url_name])
+    except (Http404, ValueError):
+        return link
+
+    if url_name in MISAGO_ATTACHMENT_VIEWS:
+        if force_shva:
+            link = "%s?shva=1" % link
+        elif link.endswith("?shva=1"):
+            link = link[:-7]
+    return link

+ 104 - 41
misago/markup/mentions.py

@@ -1,65 +1,128 @@
 import re
+from typing import Union
 
-from bs4 import BeautifulSoup
 from django.contrib.auth import get_user_model
 
-SUPPORTED_TAGS = ("h1", "h2", "h3", "h4", "h5", "h6", "div", "p")
+from .htmlparser import (
+    ElementNode,
+    RootNode,
+    TextNode,
+)
+
+EXCLUDE_ELEMENTS = ("pre", "code", "a")
 USERNAME_RE = re.compile(r"@[0-9a-z]+", re.IGNORECASE)
-MENTIONS_LIMIT = 24
+MENTIONS_LIMIT = 32
 
 
-def add_mentions(request, result):
+def add_mentions(result, root_node):
     if "@" not in result["parsed_text"]:
         return
 
-    mentions_dict = {}
+    mentions = set()
+    nodes = []
+
+    find_mentions(root_node, mentions, nodes)
+
+    if not mentions or len(mentions) > MENTIONS_LIMIT:
+        return  # No need to run mentions logic
 
-    soup = BeautifulSoup(result["parsed_text"], "html5lib")
+    users_data = get_users_data(mentions)
+    if not users_data:
+        return  # Mentioned users don't exist
 
-    elements = []
-    for tagname in SUPPORTED_TAGS:
-        if tagname in result["parsed_text"]:
-            elements += soup.find_all(tagname)
-    for element in elements:
-        add_mentions_to_element(request, element, mentions_dict)
+    for node in nodes:
+        add_mentions_to_node(node, users_data)
 
-    result["parsed_text"] = str(soup.body)[6:-7].strip()
-    result["mentions"] = list(filter(bool, mentions_dict.values()))
+    result["mentions"] = [user[0] for user in users_data.values()]
 
 
-def add_mentions_to_element(request, element, mentions_dict):
-    for item in element.contents:
-        if item.name:
-            if item.name != "a":
-                add_mentions_to_element(request, item, mentions_dict)
-        elif "@" in item.string:
-            parse_string(request, item, mentions_dict)
+def find_mentions(
+    node: Union[ElementNode, RootNode],
+    mentions: set,
+    nodes: set,
+):
+    if isinstance(node, ElementNode) and node.tag in EXCLUDE_ELEMENTS:
+        return
+
+    tracked_node = False
+    for child in node.children:
+        if isinstance(child, TextNode):
+            results = find_mentions_in_str(child.text)
+            if results:
+                mentions.update(results)
+                if not tracked_node:
+                    tracked_node = True
+                    nodes.append(node)
+        else:
+            find_mentions(child, mentions, nodes)
 
 
-def parse_string(request, element, mentions_dict):
+def find_mentions_in_str(text: str):
+    matches = USERNAME_RE.findall(text)
+    if not matches:
+        return None
+
+    return set([match.lower()[1:] for match in matches])
+
+
+def get_users_data(mentions):
     User = get_user_model()
+    users_data = {}
+
+    queryset = User.objects.filter(slug__in=mentions).values_list(
+        "id", "username", "slug"
+    )
+
+    for user_id, username, slug in queryset:
+        users_data[slug] = (user_id, username)
+
+    return users_data
+
+
+def add_mentions_to_node(node, users_data):
+    new_children = []
+
+    for child in node.children:
+        if isinstance(child, TextNode):
+            new_children += add_mentions_to_text(child.text, users_data)
+        else:
+            new_children.append(child)
+
+    node.children = new_children
+
+
+def add_mentions_to_text(text: str, users_data):
+    nodes = []
 
-    def replace_mentions(matchobj):
-        if len(mentions_dict) >= MENTIONS_LIMIT:
-            return matchobj.group(0)
+    while True:
+        match = USERNAME_RE.search(text)
+        if not match:
+            if text:
+                nodes.append(TextNode(text=text))
+            return nodes
 
-        username = matchobj.group(0)[1:].strip().lower()
+        start, end = match.span()
+        user_slug = text[start + 1 : end].lower()
 
-        if username not in mentions_dict:
-            if username == request.user.slug:
-                mentions_dict[username] = request.user
-            else:
-                try:
-                    mentions_dict[username] = User.objects.get(slug=username)
-                except User.DoesNotExist:
-                    mentions_dict[username] = None
+        # Append text between 0 and start to nodes
+        if start > 0:
+            nodes.append(TextNode(text=text[:start]))
 
-        if mentions_dict[username]:
-            user = mentions_dict[username]
-            return '<a href="%s">@%s</a>' % (user.get_absolute_url(), user.username)
+        # Append match string to nodes and keep scanning
+        if user_slug not in users_data:
+            nodes.append(TextNode(text=text[:end]))
+            text = text[end:]
+            continue
 
-        # we've failed to resolve user for username
-        return matchobj.group(0)
+        user_id, username = users_data[user_slug]
+        nodes.append(
+            ElementNode(
+                tag="a",
+                attrs={
+                    "href": f"/u/{user_slug}/{user_id}/",
+                },
+                children=[TextNode(text=f"@{username}")],
+            )
+        )
 
-    replaced_string = USERNAME_RE.sub(replace_mentions, element.string)
-    element.replace_with(BeautifulSoup(replaced_string, "html.parser"))
+        text = text[end:]

+ 16 - 130
misago/markup/parser.py

@@ -1,24 +1,18 @@
-import bleach
 import markdown
-from bs4 import BeautifulSoup
-from django.http import Http404
-from django.urls import resolve
-from htmlmin.minify import html_minify
 from markdown.extensions.fenced_code import FencedCodeExtension
 
-from ..conf import settings
 from .bbcode.code import CodeBlockExtension
 from .bbcode.hr import BBCodeHRProcessor
 from .bbcode.inline import bold, image, italics, underline, url
 from .bbcode.quote import QuoteExtension
 from .bbcode.spoiler import SpoilerExtension
+from .htmlparser import parse_html_string, print_html_string
+from .links import clean_links, linkify_texts
 from .md.shortimgs import ShortImagesExtension
 from .md.strikethrough import StrikethroughExtension
 from .mentions import add_mentions
 from .pipeline import pipeline
 
-MISAGO_ATTACHMENT_VIEWS = ("misago:attachment", "misago:attachment-thumbnail")
-
 
 def parse(
     text,
@@ -29,7 +23,6 @@ def parse(
     allow_images=True,
     allow_blocks=True,
     force_shva=False,
-    minify=True,
 ):
     """
     Message parser
@@ -61,19 +54,24 @@ def parse(
     # Clean and store parsed text
     parsing_result["parsed_text"] = parsed_text.strip()
 
-    if allow_links:
-        linkify_paragraphs(parsing_result)
+    # Run additional operations
+    if allow_mentions or allow_links or allow_images:
+        root_node = parse_html_string(parsing_result["parsed_text"])
 
-    parsing_result = pipeline.process_result(parsing_result)
+        if allow_links:
+            linkify_texts(root_node)
 
-    if allow_mentions:
-        add_mentions(request, parsing_result)
+        if allow_mentions:
+            add_mentions(parsing_result, root_node)
 
-    if allow_links or allow_images:
-        clean_links(request, parsing_result, force_shva)
+        if allow_links or allow_images:
+            clean_links(request, parsing_result, root_node, force_shva)
+
+        parsing_result["parsed_text"] = print_html_string(root_node)
+
+    # Let plugins do their magic
+    parsing_result = pipeline.process_result(parsing_result)
 
-    if minify:
-        minify_result(parsing_result)
     return parsing_result
 
 
@@ -144,115 +142,3 @@ def md_factory(allow_links=True, allow_images=True, allow_blocks=True):
         md.parser.blockprocessors.deregister("ulist")
 
     return pipeline.extend_markdown(md)
-
-
-def linkify_paragraphs(result):
-    result["parsed_text"] = bleach.linkify(
-        result["parsed_text"],
-        callbacks=settings.MISAGO_BLEACH_CALLBACKS,
-        skip_tags=["a", "code", "pre"],
-        parse_email=True,
-    )
-
-
-def clean_links(request, result, force_shva=False):
-    host = request.get_host()
-
-    soup = BeautifulSoup(result["parsed_text"], "html5lib")
-    for link in soup.find_all("a"):
-        if is_internal_link(link["href"], host):
-            link["href"] = clean_internal_link(link["href"], host)
-            result["internal_links"].append(link["href"])
-            link["href"] = clean_attachment_link(link["href"], force_shva)
-        else:
-            result["outgoing_links"].append(clean_link_prefix(link["href"]))
-            link["href"] = assert_link_prefix(link["href"])
-            link["rel"] = "external nofollow noopener"
-
-        link["target"] = "_blank"
-
-        if link.string:
-            link.string = clean_link_prefix(link.string)
-
-    for img in soup.find_all("img"):
-        img["alt"] = clean_link_prefix(img["alt"])
-        if is_internal_link(img["src"], host):
-            img["src"] = clean_internal_link(img["src"], host)
-            result["images"].append(img["src"])
-            img["src"] = clean_attachment_link(img["src"], force_shva)
-        else:
-            result["images"].append(clean_link_prefix(img["src"]))
-            img["src"] = assert_link_prefix(img["src"])
-
-    # [6:-7] trims <body></body> wrap
-    result["parsed_text"] = str(soup.body)[6:-7]
-
-
-def is_internal_link(link, host):
-    if link.startswith("/") and not link.startswith("//"):
-        return True
-
-    link = clean_link_prefix(link).lstrip("www.").lower()
-    return link.lower().startswith(host.lstrip("www."))
-
-
-def clean_link_prefix(link):
-    if link.lower().startswith("https:"):
-        link = link[6:]
-    if link.lower().startswith("http:"):
-        link = link[5:]
-    if link.startswith("//"):
-        link = link[2:]
-    return link
-
-
-def assert_link_prefix(link):
-    if link.lower().startswith("https:"):
-        return link
-    if link.lower().startswith("http:"):
-        return link
-    if link.startswith("//"):
-        return "http:%s" % link
-
-    return "http://%s" % link
-
-
-def clean_internal_link(link, host):
-    link = clean_link_prefix(link)
-
-    if link.lower().startswith("www."):
-        link = link[4:]
-    if host.lower().startswith("www."):
-        host = host[4:]
-
-    if link.lower().startswith(host):
-        link = link[len(host) :]
-
-    return link or "/"
-
-
-def clean_attachment_link(link, force_shva=False):
-    try:
-        resolution = resolve(link)
-        if not resolution.namespaces:
-            return link
-        url_name = ":".join(resolution.namespaces + [resolution.url_name])
-    except (Http404, ValueError):
-        return link
-
-    if url_name in MISAGO_ATTACHMENT_VIEWS:
-        if force_shva:
-            link = "%s?shva=1" % link
-        elif link.endswith("?shva=1"):
-            link = link[:-7]
-    return link
-
-
-def minify_result(result):
-    result["parsed_text"] = html_minify(result["parsed_text"])
-    result["parsed_text"] = strip_html_head_body(result["parsed_text"])
-
-
-def strip_html_head_body(parsed_text):
-    # [25:-14] trims <html><head></head><body> and </body></html>
-    return parsed_text[25:-14]

+ 11 - 7
misago/markup/pipeline.py

@@ -1,9 +1,8 @@
 from importlib import import_module
 
-from bs4 import BeautifulSoup
-
 from .. import hooks
 from ..conf import settings
+from .htmlparser import parse_html_string, print_html_string
 
 
 class MarkupPipeline:
@@ -22,18 +21,23 @@ class MarkupPipeline:
         return md
 
     def process_result(self, result):
-        soup = BeautifulSoup(result["parsed_text"], "html5lib")
+        if (
+            not settings.MISAGO_MARKUP_EXTENSIONS
+            and not hooks.parsing_result_processors
+        ):
+            return result
+
+        html_tree = parse_html_string(result["parsed_text"])
         for extension in settings.MISAGO_MARKUP_EXTENSIONS:
             module = import_module(extension)
             if hasattr(module, "clean_parsed"):
                 hook = getattr(module, "clean_parsed")
-                hook.process_result(result, soup)
+                hook.process_result(result, html_tree)
 
         for extension in hooks.parsing_result_processors:
-            extension(result, soup)
+            extension(result, html_tree)
 
-        souped_text = str(soup.body).strip()[6:-7]
-        result["parsed_text"] = souped_text.strip()
+        result["parsed_text"] = print_html_string(html_tree)
         return result
 
 

+ 6 - 4
misago/markup/tests/snapshots/snap_test_code_bbcode.py

@@ -13,16 +13,18 @@ snapshots[
 
 snapshots[
     "test_code_with_language_parameter 1"
-] = '<pre><code class="php">echo("Hello!");</code></pre>'
+] = '<pre><code class="php">echo(&quot;Hello!&quot;);</code></pre>'
 
 snapshots[
     "test_code_with_quoted_language_parameter 1"
-] = '<pre><code class="php">echo("Hello!");</code></pre>'
+] = '<pre><code class="php">echo(&quot;Hello!&quot;);</code></pre>'
 
 snapshots[
     "test_multi_line_code 1"
 ] = """<pre><code>&lt;script&gt;
-alert("!")
+alert(&quot;!&quot;)
 &lt;/script&gt;</code></pre>"""
 
-snapshots["test_single_line_code 1"] = '<pre><code>echo("Hello!");</code></pre>'
+snapshots[
+    "test_single_line_code 1"
+] = "<pre><code>echo(&quot;Hello!&quot;);</code></pre>"

+ 3 - 3
misago/markup/tests/snapshots/snap_test_code_md.py

@@ -10,17 +10,17 @@ snapshots = Snapshot()
 snapshots[
     "test_multi_line_code_markdown 1"
 ] = """<pre><code>&lt;script&gt;
-alert("!")
+alert(&quot;!&quot;)
 &lt;/script&gt;
 </code></pre>"""
 
 snapshots[
     "test_multi_line_code_markdown_with_language 1"
 ] = """<pre><code class="javascript">&lt;script&gt;
-alert("!")
+alert(&quot;!&quot;)
 &lt;/script&gt;
 </code></pre>"""
 
 snapshots[
     "test_single_line_code_markdown 1"
-] = '<p><code>&lt;script&gt;alert("!")&lt;/script&gt;</code></p>'
+] = "<p><code>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</code></p>"

+ 15 - 4
misago/markup/tests/snapshots/snap_test_escaping.py

@@ -8,22 +8,33 @@ from snapshottest import Snapshot
 snapshots = Snapshot()
 
 snapshots[
+    "test_code_in_quote_bbcode_header_is_escaped 1"
+] = """<aside class="quote-block">
+<div class="quote-heading">@Us&quot;&gt;&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;er</div>
+<blockquote class="quote-body">
+<p>Test</p>
+</blockquote>
+</aside>"""
+
+snapshots[
     "test_code_in_quote_bbcode_is_escaped 1"
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
-<p>&lt;script&gt;alert("!")&lt;/script&gt;</p>
+<p>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</p>
 </blockquote>
 </aside>"""
 
 snapshots[
     "test_code_in_quote_markdown_is_escaped 1"
 ] = """<blockquote>
-<p>&lt;script&gt;alert("!")&lt;/script&gt;</p>
+<p>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</p>
 </blockquote>"""
 
 snapshots[
     "test_inline_code_is_escaped 1"
-] = '<p><code>&lt;script&gt;alert("!")&lt;/script&gt;</code></p>'
+] = "<p><code>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</code></p>"
 
-snapshots["test_text_is_escaped 1"] = '<p>&lt;script&gt;alert("!")&lt;/script&gt;</p>'
+snapshots[
+    "test_text_is_escaped 1"
+] = "<p>&lt;script&gt;alert(&quot;!&quot;)&lt;/script&gt;</p>"

+ 1 - 1
misago/markup/tests/snapshots/snap_test_hr_bbcode.py

@@ -10,5 +10,5 @@ snapshots = Snapshot()
 snapshots[
     "test_hr_bbcode_is_replaced_if_its_alone_in_paragraph 1"
 ] = """<p>Lorem ipsum dolor met.</p>
-<hr/>
+<hr />
 <p>Sit amet elit.</p>"""

+ 6 - 6
misago/markup/tests/snapshots/snap_test_inline_bbcode.py

@@ -11,15 +11,15 @@ snapshots["test_bold_bbcode 1"] = "<p>Lorem <b>ipsum</b>!</p>"
 
 snapshots[
     "test_image_bbcode 1"
-] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500"/> ipsum</p>'
+] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500" /> ipsum</p>'
 
 snapshots[
     "test_image_bbcode_is_case_insensitive 1"
-] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500"/> ipsum</p>'
+] = '<p>Lorem <img alt="placekitten.com/g/1200/500" src="https://placekitten.com/g/1200/500" /> ipsum</p>'
 
 snapshots[
     "test_image_bbcode_is_escaped 1"
-] = "<p>Lorem <img alt='&lt;script language=\"application/javascript\"&gt;' src='http://&lt;script language=\"application/javascript\"&gt;'/> ipsum</p>"
+] = '<p>Lorem <img alt="&lt;script language=&quot;application/javascript&quot;&gt;" src="http://&lt;script language=&quot;application/javascript&quot;&gt;" /> ipsum</p>'
 
 snapshots["test_inline_bbcode_can_be_mixed 1"] = "<p>Lorem <b><u>ipsum</u></b>!</p>"
 
@@ -31,7 +31,7 @@ snapshots["test_italics_bbcode 1"] = "<p>Lorem <i>ipsum</i>!</p>"
 
 snapshots[
     "test_simple_inline_bbcode_is_escaped 1"
-] = '<p>Lorem <b>ips &lt;script language="application/javascript"&gt; um</b>!</p>'
+] = "<p>Lorem <b>ips &lt;script language=&quot;application/javascript&quot;&gt; um</b>!</p>"
 
 snapshots["test_underline_bbcode 1"] = "<p>Lorem <u>ipsum</u>!</p>"
 
@@ -41,11 +41,11 @@ snapshots[
 
 snapshots[
     "test_url_bbcode_is_escaped 1"
-] = '<p>Lorem <a href=\'http://&lt;script language="application/javascript"&gt;\' rel="external nofollow noopener" target="_blank">&lt;script language="application/javascript"&gt;</a> ipsum</p>'
+] = '<p>Lorem <a href="http://&lt;script language=&quot;application/javascript&quot;&gt;" rel="external nofollow noopener" target="_blank">&lt;script language=&quot;application/javascript&quot;&gt;</a> ipsum</p>'
 
 snapshots[
     "test_url_bbcode_link_text_is_escaped 1"
-] = '<p>Lorem <a href=\'http://&lt;script language="application/javascript"&gt;\' rel="external nofollow noopener" target="_blank">&lt;script language="application/javascript"&gt;</a> ipsum</p>'
+] = '<p>Lorem <a href="http://&lt;script language=&quot;application/javascript&quot;&gt;" rel="external nofollow noopener" target="_blank">&lt;script language=&quot;application/javascript&quot;&gt;</a> ipsum</p>'
 
 snapshots[
     "test_url_bbcode_with_link_text 1"

+ 2 - 2
misago/markup/tests/snapshots/snap_test_link_handling.py

@@ -21,7 +21,7 @@ snapshots[
 
 snapshots[
     "test_local_image_is_changed_to_relative_link 1"
-] = '<p>clean_links step cleans <img alt="example.com/media/img.png" src="/media/img.png"/></p>'
+] = '<p>clean_links step cleans <img alt="example.com/media/img.png" src="/media/img.png" /></p>'
 
 snapshots[
     "test_parser_converts_unmarked_links_to_hrefs 1"
@@ -33,7 +33,7 @@ snapshots[
 
 snapshots[
     "test_parser_skips_links_in_inline_code_bbcode 1"
-] = """<p>Lorem ipsum <br/>
+] = """<p>Lorem ipsum <br />
 </p><pre><code>http://test.com</code></pre><p></p>"""
 
 snapshots[

+ 22 - 22
misago/markup/tests/snapshots/snap_test_quote_bbcode.py

@@ -8,73 +8,73 @@ from snapshottest import Snapshot
 snapshots = Snapshot()
 
 snapshots[
-    "test_single_line_quote 1"
+    "test_multi_line_quote 1"
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
+<p>Another line.</p>
 </blockquote>
 </aside>"""
 
 snapshots[
-    "test_single_line_authored_quote 1"
+    "test_quote_can_contain_bbcode_or_markdown 1"
 ] = """<aside class="quote-block">
-<div class="quote-heading">@Bob</div>
+<div class="quote-heading"></div>
 <blockquote class="quote-body">
-<p>Sit amet elit.</p>
+<p>Sit <strong>amet</strong> <u>elit</u>.</p>
 </blockquote>
 </aside>"""
 
 snapshots[
-    "test_single_line_authored_quote_without_quotations 1"
+    "test_quotes_can_be_nested 1"
 ] = """<aside class="quote-block">
-<div class="quote-heading">@Bob</div>
+<div class="quote-heading"></div>
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
+<aside class="quote-block">
+<div class="quote-heading"></div>
+<blockquote class="quote-body">
+<p>Nested quote</p>
+</blockquote>
+</aside>
 </blockquote>
 </aside>"""
 
 snapshots[
-    "test_quote_can_contain_bbcode_or_markdown 1"
+    "test_quotes_can_contain_hr_markdown 1"
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
-<p>Sit <strong>amet</strong> <u>elit</u>.</p>
+<p>Sit amet elit.</p>
+<hr />
+<p>Another line.</p>
 </blockquote>
 </aside>"""
 
 snapshots[
-    "test_multi_line_quote 1"
+    "test_single_line_authored_quote 1"
 ] = """<aside class="quote-block">
-<div class="quote-heading"></div>
+<div class="quote-heading">@Bob</div>
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
-<p>Another line.</p>
 </blockquote>
 </aside>"""
 
 snapshots[
-    "test_quotes_can_be_nested 1"
+    "test_single_line_authored_quote_without_quotations 1"
 ] = """<aside class="quote-block">
-<div class="quote-heading"></div>
+<div class="quote-heading">@Bob</div>
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
-<aside class="quote-block">
-<div class="quote-heading"></div>
-<blockquote class="quote-body">
-<p>Nested quote</p>
-</blockquote>
-</aside>
 </blockquote>
 </aside>"""
 
 snapshots[
-    "test_quotes_can_contain_hr_markdown 1"
+    "test_single_line_quote 1"
 ] = """<aside class="quote-block">
 <div class="quote-heading"></div>
 <blockquote class="quote-body">
 <p>Sit amet elit.</p>
-<hr/>
-<p>Another line.</p>
 </blockquote>
 </aside>"""

+ 7 - 3
misago/markup/tests/snapshots/snap_test_short_image_markdown.py

@@ -9,14 +9,18 @@ snapshots = Snapshot()
 
 snapshots[
     "test_short_image_markdown[base] 1"
-] = '<p><img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg"/></p>'
-snapshots["test_short_image_markdown[space-one-word] 1"] = "<p>! (space)</p>"
+] = '<p><img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg" /></p>'
+
 snapshots[
     "test_short_image_markdown[space-multiple-words] 1"
 ] = "<p>! (space with other words)</p>"
+
+snapshots["test_short_image_markdown[space-one-word] 1"] = "<p>! (space)</p>"
+
 snapshots[
     "test_short_image_markdown[text-before-mark] 1"
-] = '<p>Text before exclamation mark<img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg"/></p>'
+] = '<p>Text before exclamation mark<img alt="somewhere.com/image.jpg" src="http://somewhere.com/image.jpg" /></p>'
+
 snapshots[
     "test_short_image_markdown[text-before-with-space] 1"
 ] = "<p>Text before with space in between! (sometext)</p>"

+ 8 - 8
misago/markup/tests/snapshots/snap_test_spoiler_bbcode.py

@@ -8,29 +8,29 @@ from snapshottest import Snapshot
 snapshots = Snapshot()
 
 snapshots[
-    "test_single_line_spoiler 1"
+    "test_multi_line_spoiler 1"
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
-<p>Daenerys and Jon live happily ever after!</p>
+<p>Sit amet elit.</p>
+<p>Another line.</p>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 </aside>"""
 
 snapshots[
-    "test_spoiler_can_contain_bbcode_or_markdown 1"
+    "test_single_line_spoiler 1"
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
-<p>Sit <strong>amet</strong> <u>elit</u>.</p>
+<p>Daenerys and Jon live happily ever after!</p>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 </aside>"""
 
 snapshots[
-    "test_multi_line_spoiler 1"
+    "test_spoiler_can_contain_bbcode_or_markdown 1"
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
-<p>Sit amet elit.</p>
-<p>Another line.</p>
+<p>Sit <strong>amet</strong> <u>elit</u>.</p>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>
 </aside>"""
@@ -55,7 +55,7 @@ snapshots[
 ] = """<aside class="spoiler-block">
 <blockquote class="spoiler-body">
 <p>Sit amet elit.</p>
-<hr/>
+<hr />
 <p>Another line.</p>
 </blockquote>
 <div class="spoiler-overlay"><button class="spoiler-reveal" type="button"></button></div>

+ 5 - 5
misago/markup/tests/test_code_bbcode.py

@@ -3,7 +3,7 @@ from ..parser import parse
 
 def test_single_line_code(request_mock, user, snapshot):
     text = '[code]echo("Hello!");[/code]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -15,23 +15,23 @@ alert("!")
 </script>
 [/code]
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_code_with_language_parameter(request_mock, user, snapshot):
     text = '[code=php]echo("Hello!");[/code]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_code_with_quoted_language_parameter(request_mock, user, snapshot):
     text = '[code="php"]echo("Hello!");[/code]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_code_block_disables_parsing(request_mock, user, snapshot):
     text = "[code]Dolor [b]met.[/b][/code]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 3 - 3
misago/markup/tests/test_code_md.py

@@ -3,7 +3,7 @@ from ..parser import parse
 
 def test_single_line_code_markdown(request_mock, user, snapshot):
     text = '```<script>alert("!")</script>```'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -15,7 +15,7 @@ alert("!")
 </script>
 ```
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -27,5 +27,5 @@ alert("!")
 </script>
 ```
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 10 - 4
misago/markup/tests/test_escaping.py

@@ -4,23 +4,29 @@ from ..parser import parse
 
 def test_text_is_escaped(request_mock, user, snapshot):
     text = '<script>alert("!")</script>'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_inline_code_is_escaped(request_mock, user, snapshot):
     text = '`<script>alert("!")</script>`'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_code_in_quote_markdown_is_escaped(request_mock, user, snapshot):
     text = '> <script>alert("!")</script>'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_code_in_quote_bbcode_is_escaped(request_mock, user, snapshot):
     text = '[quote]<script>alert("!")</script>[/quote]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
+    snapshot.assert_match(result["parsed_text"])
+
+
+def test_code_in_quote_bbcode_header_is_escaped(request_mock, user, snapshot):
+    text = '[quote="@Us"><script>alert("!")</script>er"]Test[/quote]'
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 2 - 2
misago/markup/tests/test_hr_bbcode.py

@@ -7,11 +7,11 @@ Lorem ipsum dolor met.
 [hr]
 Sit amet elit.
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_hr_bbcode_is_skipped_if_its_part_of_paragraph(request_mock, user, snapshot):
     text = "Lorem ipsum[hr]dolor met."
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["parsed_text"] == "<p>Lorem ipsum[hr]dolor met.</p>"

+ 69 - 0
misago/markup/tests/test_htmlparser.py

@@ -0,0 +1,69 @@
+from ..htmlparser import parse_html_string, print_html_string
+
+
+def test_parser_handles_simple_html():
+    root_node = parse_html_string("<p>Hello World!</p>")
+    assert print_html_string(root_node) == "<p>Hello World!</p>"
+
+
+def test_parser_handles_html_with_brs():
+    root_node = parse_html_string("<p>Hello<br />World!</p>")
+    assert print_html_string(root_node) == "<p>Hello<br />World!</p>"
+
+
+def test_parser_handles_html_with_hrs():
+    root_node = parse_html_string("<p>Hello</p><hr /><p>World!</p>")
+    assert print_html_string(root_node) == "<p>Hello</p><hr /><p>World!</p>"
+
+
+def test_parser_escapes_html_in_text_nodes():
+    root_node = parse_html_string("<span>Hello &lt;br&gt; World!</span>")
+    assert print_html_string(root_node) == "<span>Hello &lt;br&gt; World!</span>"
+
+
+def test_parser_escapes_quotes_in_text_nodes():
+    root_node = parse_html_string('<span>Hello "World"!</span>')
+    assert print_html_string(root_node) == "<span>Hello &quot;World&quot;!</span>"
+
+
+def test_parser_handles_attributes():
+    root_node = parse_html_string('<a href="/hello-world/">Hello World!</a>')
+    assert print_html_string(root_node) == '<a href="/hello-world/">Hello World!</a>'
+
+
+def test_parser_escapes_html_in_attributes_names():
+    root_node = parse_html_string('<span data-a<tt>r="<br>">Hello!</span>')
+    assert print_html_string(root_node) == (
+        "<span data-a&lt;tt>r=&quot;<br />&quot;&gt;Hello!</span>"
+    )
+
+
+def test_parser_escapes_quotes_in_attributes_names():
+    root_node = parse_html_string('<span "data-attr"="br">Hello!</span>')
+    assert print_html_string(root_node) == (
+        '<span &quot;data-attr&quot;="br">Hello!</span>'
+    )
+
+
+def test_parser_escapes_html_in_attributes_values():
+    root_node = parse_html_string('<span data-attr="<br>">Hello!</span>')
+    assert print_html_string(root_node) == (
+        '<span data-attr="&lt;br&gt;">Hello!</span>'
+    )
+
+
+def test_parser_handles_escaped_attribute_values():
+    root_node = parse_html_string('<span data-attr="&lt;br&gt;">Hello!</span>')
+    assert print_html_string(root_node) == (
+        '<span data-attr="&lt;br&gt;">Hello!</span>'
+    )
+
+
+def test_parser_escapes_quotes_in_attributes_values():
+    root_node = parse_html_string('<span data-attr="\'">Hello!</span>')
+    assert print_html_string(root_node) == ('<span data-attr="&#x27;">Hello!</span>')
+
+
+def test_parser_handles_bool_attributes():
+    root_node = parse_html_string("<button disabled>Hello World!</button>")
+    assert print_html_string(root_node) == "<button disabled>Hello World!</button>"

+ 16 - 16
misago/markup/tests/test_inline_bbcode.py

@@ -3,91 +3,91 @@ from ..parser import parse
 
 def test_bold_bbcode(request_mock, user, snapshot):
     text = "Lorem [b]ipsum[/b]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_italics_bbcode(request_mock, user, snapshot):
     text = "Lorem [i]ipsum[/i]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_underline_bbcode(request_mock, user, snapshot):
     text = "Lorem [u]ipsum[/u]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_simple_inline_bbcode_is_escaped(request_mock, user, snapshot):
     text = 'Lorem [b]ips <script language="application/javascript"> um[/b]!'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_inline_bbcode_can_be_mixed(request_mock, user, snapshot):
     text = "Lorem [b][u]ipsum[/u][/b]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_inline_bbcode_can_be_mixed_with_markdown(request_mock, user, snapshot):
     text = "Lorem [b]**ipsum**[/b]!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_image_bbcode(request_mock, user, snapshot):
     text = "Lorem [img]https://placekitten.com/g/1200/500[/img] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_image_bbcode_is_escaped(request_mock, user, snapshot):
     text = 'Lorem [img]<script language="application/javascript">[/img] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_image_bbcode_is_case_insensitive(request_mock, user, snapshot):
     text = "Lorem [iMg]https://placekitten.com/g/1200/500[/ImG] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_url_bbcode(request_mock, user, snapshot):
     text = "Lorem [url]https://placekitten.com/g/1200/500[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_url_bbcode_is_escaped(request_mock, user, snapshot):
     text = 'Lorem [url]<script language="application/javascript">[/url] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_url_bbcode_with_link_text(request_mock, user, snapshot):
     text = "Lorem [url=https://placekitten.com/g/1200/500]dolor[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_url_bbcode_with_long_link_text(request_mock, user, snapshot):
     text = "Lorem [url=https://placekitten.com/g/1200/500]dolor met[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_url_bbcode_with_quotes_and_link_text(request_mock, user, snapshot):
     text = 'Lorem [url="https://placekitten.com/g/1200/500"]dolor[/url] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_url_bbcode_with_quotes_and_long_link_text(request_mock, user, snapshot):
     text = 'Lorem [url="https://placekitten.com/g/1200/500"]dolor met[/url] ipsum'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -98,5 +98,5 @@ def test_url_bbcode_link_text_is_escaped(request_mock, user, snapshot):
         "[/url] ipsum"
     )
 
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 22 - 22
misago/markup/tests/test_link_handling.py

@@ -3,19 +3,19 @@ from ..parser import parse
 
 def test_parser_converts_unmarked_links_to_hrefs(request_mock, user, snapshot):
     text = "Lorem ipsum http://test.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_parser_skips_links_in_inline_code_markdown(request_mock, user, snapshot):
     text = "Lorem ipsum `http://test.com`"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_parser_skips_links_in_inline_code_bbcode(request_mock, user, snapshot):
     text = "Lorem ipsum [code]http://test.com[/code]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -25,7 +25,7 @@ def test_parser_skips_links_in_code_bbcode(request_mock, user, snapshot):
 http://test.com
 [/code]
     """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -33,13 +33,13 @@ def test_absolute_link_to_site_is_changed_to_relative_link(
     request_mock, user, snapshot
 ):
     text = "clean_links step cleans http://example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_absolute_link_to_site_is_added_to_internal_links_list(request_mock, user):
     text = "clean_links step cleans http://example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/"]
 
 
@@ -47,7 +47,7 @@ def test_absolute_link_to_site_without_schema_is_changed_to_relative_link(
     request_mock, user, snapshot
 ):
     text = "clean_links step cleans example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -55,7 +55,7 @@ def test_absolute_link_to_site_without_schema_is_added_to_internal_links_list(
     request_mock, user
 ):
     text = "clean_links step cleans example.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/"]
 
 
@@ -63,7 +63,7 @@ def test_absolute_link_with_path_to_site_is_changed_to_relative_link(
     request_mock, user, snapshot
 ):
     text = "clean_links step cleans http://example.com/somewhere-something/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -71,25 +71,25 @@ def test_absolute_link_with_path_to_site_is_added_to_internal_links_list(
     request_mock, user
 ):
     text = "clean_links step cleans http://example.com/somewhere-something/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/somewhere-something/"]
 
 
 def test_full_link_with_path_text_is_set_to_domain_and_path(request_mock, user):
     text = "clean_links step cleans http://example.com/somewhere-something/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert ">example.com/somewhere-something/<" in result["parsed_text"]
 
 
 def test_outgoing_link_is_added_to_outgoing_links_list(request_mock, user):
     text = "clean_links step cleans https://other.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com"]
 
 
 def test_outgoing_llink_includes_external_nofollow_and_noopener(request_mock, user):
     text = "Lorem [url]https://placekitten.com/g/1200/500[/url] ipsum"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert 'rel="external nofollow noopener"' in result["parsed_text"]
 
 
@@ -97,44 +97,44 @@ def test_outgoing_link_without_scheme_is_added_to_outgoing_links_list(
     request_mock, user
 ):
     text = "clean_links step cleans other.com"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com"]
 
 
 def test_outgoing_link_with_path_is_added_to_outgoing_links_list(request_mock, user):
     text = "clean_links step cleans other.com/some/path/"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com/some/path/"]
 
 
 def test_local_image_is_changed_to_relative_link(request_mock, user, snapshot):
     text = "clean_links step cleans !(example.com/media/img.png)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_local_image_is_added_to_images_list(request_mock, user):
     text = "clean_links step cleans !(example.com/media/img.png)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["images"] == ["/media/img.png"]
 
 
 def test_remote_image_is_added_to_images_list(request_mock, user):
     text = "clean_links step cleans !(other.com/media/img.png)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["images"] == ["other.com/media/img.png"]
 
 
 def test_local_image_link_is_added_to_images_and_links_lists(request_mock, user):
     text = "clean_links step cleans [!(example.com/media/img.png)](example.com/test/)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["internal_links"] == ["/test/"]
     assert result["images"] == ["/media/img.png"]
 
 
 def test_remote_image_link_is_added_to_images_and_links_lists(request_mock, user):
     text = "clean_links step cleans [!(other.com/media/img.png)](other.com/test/)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert result["outgoing_links"] == ["other.com/test/"]
     assert result["images"] == ["other.com/media/img.png"]
 
@@ -143,7 +143,7 @@ def test_parser_adds_shva_to_attachment_link_querystring_if_force_option_is_enab
     request_mock, user
 ):
     text = "clean_links step cleans ![3.png](http://example.com/a/thumb/test/43/)"
-    result = parse(text, request_mock, user, minify=False, force_shva=True)
+    result = parse(text, request_mock, user, force_shva=True)
     assert "/a/thumb/test/43/?shva=1" in result["parsed_text"]
 
 
@@ -151,5 +151,5 @@ def test_parser_skips_shva_in_attachment_link_querystring_if_force_option_is_omi
     request_mock, user
 ):
     text = "clean_links step cleans ![3.png](http://example.com/a/thumb/test/43/)"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     assert "?shva=1" not in result["parsed_text"]

+ 52 - 21
misago/markup/tests/test_mentions.py

@@ -1,36 +1,51 @@
+from ..htmlparser import parse_html_string, print_html_string
 from ..mentions import add_mentions
 
 
-def test_util_replaces_mention_with_link_to_user_profile_in_parsed_text(
-    request_mock, user
-):
+def test_util_replaces_mention_with_link_to_user_profile_in_parsed_text(user):
     parsing_result = {"parsed_text": f"<p>Hello, @{user.username}!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == (
         f'<p>Hello, <a href="{user.get_absolute_url()}">@{user.username}</a>!</p>'
     )
 
 
-def test_util_adds_mention_to_parsig_result(request_mock, user):
+def test_util_adds_mention_to_parsig_result(user):
     parsing_result = {"parsed_text": f"<p>Hello, @{user.username}!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
-    assert parsing_result["mentions"] == [user]
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
+    assert parsing_result["mentions"] == [user.id]
 
 
-def test_mentions_arent_added_for_nonexisting_user(request_mock, user):
+def test_mentions_arent_added_for_nonexisting_user(user):
     parsing_result = {"parsed_text": f"<p>Hello, @OtherUser!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == "<p>Hello, @OtherUser!</p>"
 
 
 def test_util_replaces_multiple_mentions_with_link_to_user_profiles_in_parsed_text(
-    request_mock, user, other_user
+    user, other_user
 ):
     parsing_result = {
         "parsed_text": f"<p>Hello, @{user.username} and @{other_user.username}!</p>",
         "mentions": [],
     }
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert (
         f'<a href="{user.get_absolute_url()}">@{user.username}</a>'
         in parsing_result["parsed_text"]
@@ -41,38 +56,54 @@ def test_util_replaces_multiple_mentions_with_link_to_user_profiles_in_parsed_te
     )
 
 
-def test_util_adds_multiple_mentions_to_parsig_result(request_mock, user, other_user):
+def test_util_adds_multiple_mentions_to_parsig_result(user, other_user):
     parsing_result = {
         "parsed_text": f"<p>Hello, @{user.username} and @{other_user.username}!</p>",
         "mentions": [],
     }
-    add_mentions(request_mock, parsing_result)
-    assert parsing_result["mentions"] == [user, other_user]
+    root_node = parse_html_string(parsing_result["parsed_text"])
 
+    add_mentions(parsing_result, root_node)
 
-def test_util_handles_repeated_mentions_of_same_user(request_mock, user):
+    parsing_result["parsed_text"] = print_html_string(root_node)
+    assert parsing_result["mentions"] == [user.id, other_user.id]
+
+
+def test_util_handles_repeated_mentions_of_same_user(user):
     parsing_result = {
         "parsed_text": f"<p>Hello, @{user.username} and @{user.username}!</p>",
         "mentions": [],
     }
-    add_mentions(request_mock, parsing_result)
-    assert parsing_result["mentions"] == [user]
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
+    assert parsing_result["mentions"] == [user.id]
 
 
-def test_util_skips_mentions_in_links(request_mock, user, snapshot):
+def test_util_skips_mentions_in_links(user, snapshot):
     parsing_result = {
         "parsed_text": f'<p>Hello, <a href="/">@{user.username}</a></p>',
         "mentions": [],
     }
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == (
         f'<p>Hello, <a href="/">@{user.username}</a></p>'
     )
     assert parsing_result["mentions"] == []
 
 
-def test_util_handles_text_without_mentions(request_mock):
+def test_util_handles_text_without_mentions(db):
     parsing_result = {"parsed_text": f"<p>Hello, world!</p>", "mentions": []}
-    add_mentions(request_mock, parsing_result)
+    root_node = parse_html_string(parsing_result["parsed_text"])
+
+    add_mentions(parsing_result, root_node)
+
+    parsing_result["parsed_text"] = print_html_string(root_node)
     assert parsing_result["parsed_text"] == ("<p>Hello, world!</p>")
     assert parsing_result["mentions"] == []

+ 0 - 17
misago/markup/tests/test_parser.py

@@ -1,17 +0,0 @@
-from ..parser import parse
-
-
-def test_html_is_escaped(request_mock, user, snapshot):
-    text = "Lorem <strong>ipsum!</strong>"
-    result = parse(text, request_mock, user, minify=True)
-    snapshot.assert_match(result["parsed_text"])
-
-
-def test_parsed_text_is_minified(request_mock, user, snapshot):
-    text = """
-Lorem **ipsum** dolor met.
-
-Sit amet elit.
-"""
-    result = parse(text, request_mock, user, minify=True)
-    snapshot.assert_match(result["parsed_text"])

+ 7 - 7
misago/markup/tests/test_quote_bbcode.py

@@ -3,25 +3,25 @@ from ..parser import parse
 
 def test_single_line_quote(request_mock, user, snapshot):
     text = "[quote]Sit amet elit.[/quote]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_single_line_authored_quote(request_mock, user, snapshot):
     text = '[quote="@Bob"]Sit amet elit.[/quote]'
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_single_line_authored_quote_without_quotations(request_mock, user, snapshot):
     text = "[quote=@Bob]Sit amet elit.[/quote]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_quote_can_contain_bbcode_or_markdown(request_mock, user, snapshot):
     text = "[quote]Sit **amet** [u]elit[/u].[/quote]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -33,7 +33,7 @@ Sit amet elit.
 Another line.
 [/quote]
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -44,7 +44,7 @@ Sit amet elit.
 [quote]Nested quote[/quote]
 [/quote]
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -57,5 +57,5 @@ Sit amet elit.
 Another line.
 [/quote]
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 1 - 1
misago/markup/tests/test_short_image_markdown.py

@@ -19,5 +19,5 @@ from ..parser import parse
     ],
 )
 def test_short_image_markdown(request_mock, user, snapshot, text):
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 5 - 5
misago/markup/tests/test_spoiler_bbcode.py

@@ -3,13 +3,13 @@ from ..parser import parse
 
 def test_single_line_spoiler(request_mock, user, snapshot):
     text = "[spoiler]Daenerys and Jon live happily ever after![/spoiler]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
 def test_spoiler_can_contain_bbcode_or_markdown(request_mock, user, snapshot):
     text = "[spoiler]Sit **amet** [u]elit[/u].[/spoiler]"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -21,7 +21,7 @@ Sit amet elit.
 Another line.
 [/spoiler]
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -32,7 +32,7 @@ Sit amet elit.
 [spoiler]Nested spoiler[/spoiler]
 [/spoiler]
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])
 
 
@@ -45,5 +45,5 @@ Sit amet elit.
 Another line.
 [/spoiler]
 """
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 1 - 1
misago/markup/tests/test_strikethrough_markdown.py

@@ -3,5 +3,5 @@ from ..parser import parse
 
 def test_strikethrough_markdown(request_mock, user, snapshot):
     text = "Lorem ~~ipsum~~ dolor met!"
-    result = parse(text, request_mock, user, minify=False)
+    result = parse(text, request_mock, user)
     snapshot.assert_match(result["parsed_text"])

+ 3 - 3
misago/threads/api/postingendpoint/mentions.py

@@ -8,9 +8,9 @@ class MentionsMiddleware(PostingMiddleware):
             existing_mentions = self.get_existing_mentions()
 
         new_mentions = []
-        for user in self.post.parsing_result["mentions"]:
-            if user.pk not in existing_mentions:
-                new_mentions.append(user)
+        for user_pk in self.post.parsing_result["mentions"]:
+            if user_pk not in existing_mentions:
+                new_mentions.append(user_pk)
 
         if new_mentions:
             self.post.mentions.add(*new_mentions)

+ 2 - 3
misago/threads/tests/test_post_mentions.py

@@ -55,7 +55,7 @@ class PostMentionsTests(AuthenticatedUserTestCase):
         self.assertEqual(post.mentions.all()[0], self.user)
 
     def test_mention_limit(self):
-        """endpoint mentions limits mentions to 24 users"""
+        """endpoint mentions over limit results in no mentions set"""
         users = []
 
         for i in range(MENTIONS_LIMIT + 5):
@@ -70,8 +70,7 @@ class PostMentionsTests(AuthenticatedUserTestCase):
 
         post = self.user.post_set.order_by("id").last()
 
-        self.assertEqual(post.mentions.count(), 24)
-        self.assertEqual(list(post.mentions.order_by("id")), users[:24])
+        self.assertEqual(post.mentions.count(), 0)
 
     def test_mention_update(self):
         """edit post endpoint updates mentions"""

+ 0 - 3
requirements.in

@@ -1,13 +1,10 @@
 ariadne
 ariadne_django
-beautifulsoup4<4.8
-bleach
 celery[redis]
 coveralls
 django<4
 djangorestframework
 django-debug-toolbar
-django-htmlmin
 django-mptt
 django-simple-sso
 Faker

+ 2 - 17
requirements.txt

@@ -20,14 +20,8 @@ async-timeout==4.0.2
     # via redis
 attrs==22.1.0
     # via pytest
-beautifulsoup4==4.7.1
-    # via
-    #   -r requirements.in
-    #   django-htmlmin
 billiard==3.6.4.0
     # via celery
-bleach==5.0.1
-    # via -r requirements.in
 celery[redis]==5.2.7
     # via -r requirements.in
 certifi==2022.6.15
@@ -73,8 +67,6 @@ django==3.2.15
     #   webservices
 django-debug-toolbar==3.5.0
     # via -r requirements.in
-django-htmlmin==0.11.0
-    # via -r requirements.in
 django-js-asset==2.0.0
     # via django-mptt
 django-mptt==0.13.4
@@ -92,9 +84,7 @@ fastdiff==0.3.0
 graphql-core==3.2.1
     # via ariadne
 html5lib==1.1
-    # via
-    #   -r requirements.in
-    #   django-htmlmin
+    # via -r requirements.in
 idna==3.3
     # via
     #   anyio
@@ -173,7 +163,6 @@ responses==0.21.0
     # via -r requirements.in
 six==1.16.0
     # via
-    #   bleach
     #   click-repl
     #   html5lib
     #   python-dateutil
@@ -186,8 +175,6 @@ social-auth-app-django==5.0.0
     # via -r requirements.in
 social-auth-core==4.3.0
     # via social-auth-app-django
-soupsieve==2.3.2.post1
-    # via beautifulsoup4
 sqlparse==0.4.2
     # via
     #   django
@@ -220,9 +207,7 @@ wasmer-compiler-cranelift==1.1.0
 wcwidth==0.2.5
     # via prompt-toolkit
 webencodings==0.5.1
-    # via
-    #   bleach
-    #   html5lib
+    # via html5lib
 webservices[django]==0.7
     # via django-simple-sso
 wrapt==1.14.1