from HTMLParser import HTMLParser
from urlparse import urlparse
from misago.utils.strings import random_string
class RemoveHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.clean_text = ''
self.lookback = []
def handle_entityref(self, name):
if name == 'gt':
self.clean_text += '>'
if name == 'lt':
self.clean_text += '<'
def handle_starttag(self, tag, attrs):
if tag == 'img':
self.handle_startendtag(tag, attrs)
else:
self.lookback.append(tag)
def handle_endtag(self, tag):
try:
if self.lookback[-1] == tag:
self.lookback.pop()
except IndexError:
pass
def handle_startendtag(self, tag, attrs):
try:
if tag == 'img':
for attr in attrs:
if attr[0] == 'alt':
self.clean_text += attr[1]
break
except KeyError:
pass
def handle_data(self, data):
# String does not repeat itself
if self.clean_text[-len(data):] != data:
# String is not "QUOTE"
try:
if self.lookback[-1] in ('strong', 'em'):
self.clean_text += data
elif not (data == 'Quote' and self.lookback[-1] == 'h3' and self.lookback[-2] == 'blockquote'):
self.clean_text += data
except IndexError:
self.clean_text += data