mentions.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import re
  2. from typing import Union
  3. from django.contrib.auth import get_user_model
  4. from .htmlparser import (
  5. ElementNode,
  6. RootNode,
  7. TextNode,
  8. )
  9. EXCLUDE_ELEMENTS = ("pre", "code", "a")
  10. USERNAME_RE = re.compile(r"@[0-9a-z]+", re.IGNORECASE)
  11. MENTIONS_LIMIT = 32
  12. def add_mentions(result, root_node):
  13. if "@" not in result["parsed_text"]:
  14. return
  15. mentions = set()
  16. nodes = []
  17. find_mentions(root_node, mentions, nodes)
  18. if not mentions or len(mentions) > MENTIONS_LIMIT:
  19. return # No need to run mentions logic
  20. users_data = get_users_data(mentions)
  21. if not users_data:
  22. return # Mentioned users don't exist
  23. for node in nodes:
  24. add_mentions_to_node(node, users_data)
  25. result["mentions"] = [user[0] for user in users_data.values()]
  26. def find_mentions(
  27. node: Union[ElementNode, RootNode],
  28. mentions: set,
  29. nodes: set,
  30. ):
  31. if isinstance(node, ElementNode) and node.tag in EXCLUDE_ELEMENTS:
  32. return
  33. tracked_node = False
  34. for child in node.children:
  35. if isinstance(child, TextNode):
  36. results = find_mentions_in_str(child.text)
  37. if results:
  38. mentions.update(results)
  39. if not tracked_node:
  40. tracked_node = True
  41. nodes.append(node)
  42. else:
  43. find_mentions(child, mentions, nodes)
  44. def find_mentions_in_str(text: str):
  45. matches = USERNAME_RE.findall(text)
  46. if not matches:
  47. return None
  48. return set([match.lower()[1:] for match in matches])
  49. def get_users_data(mentions):
  50. User = get_user_model()
  51. users_data = {}
  52. queryset = User.objects.filter(slug__in=mentions).values_list(
  53. "id", "username", "slug"
  54. )
  55. for user_id, username, slug in queryset:
  56. users_data[slug] = (user_id, username)
  57. return users_data
  58. def add_mentions_to_node(node, users_data):
  59. new_children = []
  60. for child in node.children:
  61. if isinstance(child, TextNode):
  62. new_children += add_mentions_to_text(child.text, users_data)
  63. else:
  64. new_children.append(child)
  65. node.children = new_children
  66. def add_mentions_to_text(text: str, users_data):
  67. nodes = []
  68. while True:
  69. match = USERNAME_RE.search(text)
  70. if not match:
  71. if text:
  72. nodes.append(TextNode(text=text))
  73. return nodes
  74. start, end = match.span()
  75. user_slug = text[start + 1 : end].lower()
  76. # Append text between 0 and start to nodes
  77. if start > 0:
  78. nodes.append(TextNode(text=text[:start]))
  79. # Append match string to nodes and keep scanning
  80. if user_slug not in users_data:
  81. nodes.append(TextNode(text=text[:end]))
  82. text = text[end:]
  83. continue
  84. user_id, username = users_data[user_slug]
  85. nodes.append(
  86. ElementNode(
  87. tag="a",
  88. attrs={
  89. "href": f"/u/{user_slug}/{user_id}/",
  90. "data-quote": f"@{username}",
  91. },
  92. children=[TextNode(text=f"@{username}")],
  93. )
  94. )
  95. text = text[end:]