12 years ago · d6a29829e0
--- a/misago/markdown/extensions/cleanlinks.py
+++ b/misago/markdown/extensions/cleanlinks.py
@@ -0,0 +1,37 @@
 
															+import markdown

														
 
															+from markdown.util import etree

														
 
															+from misago.utils.urls import is_url, is_inner, clean_inner

														
 
															+

														
 
															+class CleanLinksExtension(markdown.Extension):

														
 
															+    def extendMarkdown(self, md):

														
 
															+        md.registerExtension(self)

														
 
															+        md.treeprocessors.add('mi_cleanlinks',

														
 
															+                              CleanLinksTreeprocessor(md),

														
 
															+                              '_end')

														
 
															+

														
 
															+

														
 
															+class CleanLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):

														
 
															+    def run(self, root):

														
 
															+        self.inurl = False

														
 
															+        return self.walk_tree(root)

														
 
															+

														
 
															+    def walk_tree(self, node):

														
 
															+        if node.tag == 'a':

														
 
															+            self.inurl = True

														
 
															+            if is_inner(node.get('href')):

														
 
															+                node.set('href', clean_inner(node.get('href')))

														
 
															+            else:

														
 
															+                node.set('rel', 'nofollow')

														
 
															+        if node.tag == 'img':

														
 
															+            if is_inner(node.get('src')):

														
 
															+                node.set('src', '%s' % clean_inner(node.get('src')))

														
 
															+

														
 
															+        try:

														
 
															+            if self.inurl and is_url(node.text):

														
 
															+                node.text = clean_inner(node.text)[1:]

														
 
															+        except TypeError:

														
 
															+            pass

														
 
															+            

														
 
															+        for i in node:

														
 
															+            self.walk_tree(i)

														
 
															+        self.inurl = False

														
--- a/misago/markdown/extensions/magiclinks.py
+++ b/misago/markdown/extensions/magiclinks.py
@@ -4,6 +4,8 @@ import markdown
 
															 from markdown.inlinepatterns import LinkPattern

														
 
															 from markdown.postprocessors import RawHtmlPostprocessor

														
 
															 from markdown.util import etree

														
 
															+from misago.utils.strings import html_escape

														
 
															+from misago.utils.urls import is_inner, clean_inner

														
 
															 # Global vars

														
 
															 MAGICLINKS_RE = re.compile(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', re.UNICODE)

														
@@ -25,8 +27,11 @@ class MagicLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):
 
															             link = LinkPattern(MAGICLINKS_RE, self.markdown)

														
 
															             href = link.sanitize_url(link.unescape(matchobj.group(0).strip()))

														
 
															             if href:

														
 
															-                href = self.escape(href)

														
 
															-                return self.markdown.htmlStash.store('<a href="%(href)s">%(href)s</a>' % {'href': href}, safe=True)

														
 
															+                if is_inner(href):

														
 
															+                    clean = clean_inner(href)

														
 
															+                    return self.markdown.htmlStash.store('<a href="%s">%s</a>' % (clean, clean[1:]), safe=True)

														
 
															+                else:

														
 
															+                    return self.markdown.htmlStash.store('<a href="%(href)s" rel="nofollow">%(href)s</a>' % {'href': href}, safe=True)

														
 
															             else:

														
 
															                 return matchobj.group(0)

														
@@ -36,10 +41,4 @@ class MagicLinksTreeprocessor(markdown.treeprocessors.Treeprocessor):
 
															             if node.tail and unicode(node.tail).strip():

														
 
															                 node.tail = MAGICLINKS_RE.sub(parse_link, unicode(node.tail))

														
 
															             for i in node:

														
 
															-                self.walk_tree(i)

														
 
															-

														
 
															-    def escape(self, html):

														
 
															-        html = html.replace('&', '&amp;')

														
 
															-        html = html.replace('<', '&lt;')

														
 
															-        html = html.replace('>', '&gt;')

														
 
															-        return html.replace('"', '&quot;')

														
 
															+                self.walk_tree(i)
														
--- a/misago/markdown/factory.py
+++ b/misago/markdown/factory.py
@@ -1,48 +1,14 @@
 
															 import re

														
 
															 import markdown

														
 
															-from HTMLParser import HTMLParser

														
 
															 from django.conf import settings

														
 
															 from django.utils.importlib import import_module

														
 
															 from django.utils.translation import ugettext_lazy as _

														
 
															 from misago.utils.strings import random_string

														
 
															-

														
 
															-class ClearHTMLParser(HTMLParser):

														
 
															-    def __init__(self):

														
 
															-        HTMLParser.__init__(self)

														
 
															-        self.clean_text = ''

														
 
															-        self.lookback = []

														
 
															-        

														
 
															-    def handle_entityref(self, name):

														
 
															-        if name == 'gt':

														
 
															-            self.clean_text += '>'

														
 
															-        if name == 'lt':

														
 
															-            self.clean_text += '<'

														
 
															-

														
 
															-    def handle_starttag(self, tag, attrs):

														
 
															-        self.lookback.append(tag)

														
 
															-

														
 
															-    def handle_endtag(self, tag):

														
 
															-        try:

														
 
															-            if self.lookback[-1] == tag:

														
 
															-                self.lookback.pop()

														
 
															-        except IndexError:

														
 
															-            pass

														
 
															-        

														
 
															-    def handle_data(self, data):

														
 
															-        # String does not repeat itself

														
 
															-        if self.clean_text[-len(data):] != data:

														
 
															-            # String is not "QUOTE"

														
 
															-            try:

														
 
															-                if self.lookback[-1] in ('strong', 'em'):

														
 
															-                    self.clean_text += data

														
 
															-                elif not (data == 'Quote' and self.lookback[-1] == 'h3' and self.lookback[-2] == 'blockquote'):

														
 
															-                    self.clean_text += data

														
 
															-            except IndexError:

														
 
															-                self.clean_text += data

														
 
															-

														
 
															+from misago.markdown.extensions.cleanlinks import CleanLinksExtension

														
 
															+from misago.markdown.parsers import RemoveHTMLParser

														
 
															 def clear_markdown(text):

														
 
															-    parser = ClearHTMLParser()

														
 
															+    parser = RemoveHTMLParser()

														
 
															     parser.feed(text)

														
 
															     return parser.clean_text

														
@@ -62,6 +28,8 @@ def signature_markdown(acl, text):
 
															                            extensions=['nl2br'])

														
 
															     remove_unsupported(md)

														
 
															+    cleanlinks = CleanLinksExtension()

														
 
															+    cleanlinks.extendMarkdown(md)

														
 
															     if not acl.usercp.allow_signature_links():

														
 
															         del md.inlinePatterns['link']

														
@@ -96,7 +64,8 @@ def post_markdown(request, text):
 
															         ext = attr()

														
 
															         ext.extendMarkdown(md)

														
 
															     text = md.convert(text)

														
 
															-    return tidy_markdown(md, text)

														
 
															+    md, text = tidy_markdown(md, text)

														
 
															+    return md, text

														
 
															 def tidy_markdown(md, text):

														
--- a/misago/markdown/parsers.py
+++ b/misago/markdown/parsers.py
@@ -0,0 +1,38 @@
 
															+from HTMLParser import HTMLParser

														
 
															+from urlparse import urlparse

														
 
															+from django.conf import settings

														
 
															+from misago.utils.strings import random_string

														
 
															+

														
 
															+class RemoveHTMLParser(HTMLParser):

														
 
															+    def __init__(self):

														
 
															+        HTMLParser.__init__(self)

														
 
															+        self.clean_text = ''

														
 
															+        self.lookback = []

														
 
															+        

														
 
															+    def handle_entityref(self, name):

														
 
															+        if name == 'gt':

														
 
															+            self.clean_text += '>'

														
 
															+        if name == 'lt':

														
 
															+            self.clean_text += '<'

														
 
															+

														
 
															+    def handle_starttag(self, tag, attrs):

														
 
															+        self.lookback.append(tag)

														
 
															+

														
 
															+    def handle_endtag(self, tag):

														
 
															+        try:

														
 
															+            if self.lookback[-1] == tag:

														
 
															+                self.lookback.pop()

														
 
															+        except IndexError:

														
 
															+            pass

														
 
															+        

														
 
															+    def handle_data(self, data):

														
 
															+        # String does not repeat itself

														
 
															+        if self.clean_text[-len(data):] != data:

														
 
															+            # String is not "QUOTE"

														
 
															+            try:

														
 
															+                if self.lookback[-1] in ('strong', 'em'):

														
 
															+                    self.clean_text += data

														
 
															+                elif not (data == 'Quote' and self.lookback[-1] == 'h3' and self.lookback[-2] == 'blockquote'):

														
 
															+                    self.clean_text += data

														
 
															+            except IndexError:

														
 
															+                self.clean_text += data
														
--- a/misago/settings_base.py
+++ b/misago/settings_base.py
@@ -144,6 +144,7 @@ MARKDOWN_EXTENSIONS = (
 
															     'misago.markdown.extensions.quotes.QuoteTitlesExtension',

														
 
															     'misago.markdown.extensions.mentions.MentionsExtension',

														
 
															     'misago.markdown.extensions.magiclinks.MagicLinksExtension',

														
 
															+    'misago.markdown.extensions.cleanlinks.CleanLinksExtension',

														
 
															 )

														
 
															 # Name of root urls configuration

														
--- a/misago/utils/strings.py
+++ b/misago/utils/strings.py
@@ -27,3 +27,9 @@ def short_string(string, length=16):
 
															     if len(bits[-1]) < 3:

														
 
															         bits.pop()

														
 
															     return '%s...' % (' '.join(bits))

														
 
															+

														
 
															+def html_escape(html):

														
 
															+    html = html.replace('&', '&amp;')

														
 
															+    html = html.replace('<', '&lt;')

														
 
															+    html = html.replace('>', '&gt;')

														
 
															+    return html.replace('"', '&quot;')
														
--- a/misago/utils/urls.py
+++ b/misago/utils/urls.py
@@ -0,0 +1,24 @@
 
															+#-*- coding: utf-8 -*-

														
 
															+import re

														
 
															+from urlparse import urlparse

														
 
															+from django.conf import settings

														
 
															+from misago.utils.strings import html_escape

														
 
															+

														
 
															+URL_RE = re.compile(r'^(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))$', re.UNICODE)

														
 
															+

														
 
															+def is_url(string):

														
 
															+    return URL_RE.search(string.strip()) != None

														
 
															+

														
 
															+

														
 
															+def is_inner(string):

														
 
															+    return urlparse(string.strip()).netloc.lower() == urlparse(settings.BOARD_ADDRESS.lower()).netloc

														
 
															+

														
 
															+

														
 
															+def clean_inner(string):

														
 
															+    parsed = urlparse(string.strip())

														
 
															+    href = parsed.path

														
 
															+    if parsed.query:

														
 
															+        href += '?%s' % parsed.query

														
 
															+    if parsed.fragment:

														
 
															+        href += '#%s' % parsed.fragment

														
 
															+    return html_escape(href)