parsers.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from HTMLParser import HTMLParser
  2. from urlparse import urlparse
  3. from django.conf import settings
  4. from misago.utils.strings import random_string
  5. class RemoveHTMLParser(HTMLParser):
  6. def __init__(self):
  7. HTMLParser.__init__(self)
  8. self.clean_text = ''
  9. self.lookback = []
  10. def handle_entityref(self, name):
  11. if name == 'gt':
  12. self.clean_text += '>'
  13. if name == 'lt':
  14. self.clean_text += '<'
  15. def handle_starttag(self, tag, attrs):
  16. self.lookback.append(tag)
  17. def handle_endtag(self, tag):
  18. try:
  19. if self.lookback[-1] == tag:
  20. self.lookback.pop()
  21. except IndexError:
  22. pass
  23. def handle_data(self, data):
  24. # String does not repeat itself
  25. if self.clean_text[-len(data):] != data:
  26. # String is not "QUOTE"
  27. try:
  28. if self.lookback[-1] in ('strong', 'em'):
  29. self.clean_text += data
  30. elif not (data == 'Quote' and self.lookback[-1] == 'h3' and self.lookback[-2] == 'blockquote'):
  31. self.clean_text += data
  32. except IndexError:
  33. self.clean_text += data