test_htmlparser.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from ..htmlparser import parse_html_string, print_html_string
  2. def test_parser_handles_simple_html():
  3. root_node = parse_html_string("<p>Hello World!</p>")
  4. assert print_html_string(root_node) == "<p>Hello World!</p>"
  5. def test_parser_handles_html_with_brs():
  6. root_node = parse_html_string("<p>Hello<br />World!</p>")
  7. assert print_html_string(root_node) == "<p>Hello<br />World!</p>"
  8. def test_parser_handles_html_with_hrs():
  9. root_node = parse_html_string("<p>Hello</p><hr /><p>World!</p>")
  10. assert print_html_string(root_node) == "<p>Hello</p><hr /><p>World!</p>"
  11. def test_parser_escapes_html_in_text_nodes():
  12. root_node = parse_html_string("<span>Hello &lt;br&gt; World!</span>")
  13. assert print_html_string(root_node) == "<span>Hello &lt;br&gt; World!</span>"
  14. def test_parser_escapes_quotes_in_text_nodes():
  15. root_node = parse_html_string('<span>Hello "World"!</span>')
  16. assert print_html_string(root_node) == "<span>Hello &quot;World&quot;!</span>"
  17. def test_parser_handles_attributes():
  18. root_node = parse_html_string('<a href="/hello-world/">Hello World!</a>')
  19. assert print_html_string(root_node) == '<a href="/hello-world/">Hello World!</a>'
  20. def test_parser_escapes_html_in_attributes_names():
  21. root_node = parse_html_string('<span data-a<tt>r="<br>">Hello!</span>')
  22. assert print_html_string(root_node) == (
  23. "<span data-a&lt;tt>r=&quot;<br />&quot;&gt;Hello!</span>"
  24. )
  25. def test_parser_escapes_quotes_in_attributes_names():
  26. root_node = parse_html_string('<span "data-attr"="br">Hello!</span>')
  27. assert print_html_string(root_node) == (
  28. '<span &quot;data-attr&quot;="br">Hello!</span>'
  29. )
  30. def test_parser_escapes_html_in_attributes_values():
  31. root_node = parse_html_string('<span data-attr="<br>">Hello!</span>')
  32. assert print_html_string(root_node) == (
  33. '<span data-attr="&lt;br&gt;">Hello!</span>'
  34. )
  35. def test_parser_handles_escaped_attribute_values():
  36. root_node = parse_html_string('<span data-attr="&lt;br&gt;">Hello!</span>')
  37. assert print_html_string(root_node) == (
  38. '<span data-attr="&lt;br&gt;">Hello!</span>'
  39. )
  40. def test_parser_escapes_quotes_in_attributes_values():
  41. root_node = parse_html_string('<span data-attr="\'">Hello!</span>')
  42. assert print_html_string(root_node) == ('<span data-attr="&#x27;">Hello!</span>')
  43. def test_parser_handles_bool_attributes():
  44. root_node = parse_html_string("<button disabled>Hello World!</button>")
  45. assert print_html_string(root_node) == "<button disabled>Hello World!</button>"