4 years ago · 6dc2e122ce
--- a/flaskbb/configs/default.py
+++ b/flaskbb/configs/default.py
@@ -47,6 +47,12 @@ class DefaultConfig(object):
 
				     # This only affects the url generation with 'url_for'.
			
 
				     PREFERRED_URL_SCHEME = "http"
			
 
				 
			
 
				+    # Allowed Hosts
			
 
				+    # A List of allowed hosts who will be considered "safe" when redirecting
			
 
				+    # urls. If set to None only the host currently serving this website
			
 
				+    # will be considered "safe".
			
 
				+    ALLOWED_HOSTS = None
			
 
				+
			
 
				     # Logging Settings
			
 
				     # ------------------------------
			
 
				     # This config section will deal with the logging settings
			
--- a/flaskbb/utils/helpers.py
+++ b/flaskbb/utils/helpers.py
@@ -19,7 +19,6 @@ import warnings
 
				 from datetime import datetime, timedelta
			
 
				 from email import message_from_string
			
 
				 from functools import wraps
			
 
				-from urllib.parse import urlparse, urljoin
			
 
				 
			
 
				 import pkg_resources
			
 
				 import requests
			
@@ -28,7 +27,7 @@ from babel.core import get_locale_identifier
 
				 from babel.dates import format_date as babel_format_date
			
 
				 from babel.dates import format_datetime as babel_format_datetime
			
 
				 from babel.dates import format_timedelta as babel_format_timedelta
			
 
				-from flask import flash, g, redirect, request, session, url_for
			
 
				+from flask import current_app, flash, g, redirect, request, session, url_for
			
 
				 from flask_allows import Permission
			
 
				 from flask_babelplus import lazy_gettext as _
			
 
				 from flask_login import current_user
			
@@ -40,6 +39,7 @@ from werkzeug.local import LocalProxy
 
				 from werkzeug.utils import ImportStringError, import_string
			
 
				 
			
 
				 from flaskbb.extensions import babel, redis_store
			
 
				+from flaskbb.utils.http import is_safe_url
			
 
				 from flaskbb.utils.settings import flaskbb_config
			
 
				 
			
 
				 
			
@@ -77,27 +77,14 @@ def slugify(text, delim=u"-"):
 
				     return str(delim.join(result))
			
 
				 
			
 
				 
			
 
				-def is_safe_url(target):
			
 
				-    """Check if target will lead to the same server
			
 
				-    Ref: https://web.archive.org/web/20190128010142/http://flask.pocoo.org/snippets/62/
			
 
				-
			
 
				-    :param target: The redirect target
			
 
				-    """
			
 
				-    ref_url = urlparse(request.host_url)
			
 
				-    test_url = urlparse(urljoin(request.host_url, target))
			
 
				-    return (
			
 
				-        test_url.scheme in ("http", "https")
			
 
				-        and ref_url.netloc == test_url.netloc
			
 
				-    )
			
 
				-
			
 
				-
			
 
				 def redirect_url(endpoint, use_referrer=True):
			
 
				     """Generates a redirect url based on the referrer or endpoint."""
			
 
				     targets = [endpoint]
			
 
				+    allowed_hosts = current_app.config["ALLOWED_HOSTS"]
			
 
				     if use_referrer:
			
 
				         targets.insert(0, request.referrer)
			
 
				     for target in targets:
			
 
				-        if target and is_safe_url(target):
			
 
				+        if target and is_safe_url(target, allowed_hosts):
			
 
				             return target
			
 
				 
			
 
				 
			
--- a/flaskbb/utils/http.py
+++ b/flaskbb/utils/http.py
@@ -0,0 +1,131 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+    flaskbb.utils.http
			
 
				+    ~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+    Provides a utility function that attempts to validate an URL against
			
 
				+    a set of valid hosts.
			
 
				+
			
 
				+    See https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet
			
 
				+    for more information about this topic.
			
 
				+
			
 
				+    Note: Most of this code has been taken from Django 3.2.0.alpha0.
			
 
				+"""
			
 
				+import unicodedata
			
 
				+from urllib.parse import (
			
 
				+    ParseResult,
			
 
				+    SplitResult,
			
 
				+    _coerce_args,
			
 
				+    _splitnetloc,
			
 
				+    _splitparams,
			
 
				+    scheme_chars,
			
 
				+    uses_params,
			
 
				+)
			
 
				+
			
 
				+# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.
			
 
				+def _urlparse(url, scheme="", allow_fragments=True):
			
 
				+    """Parse a URL into 6 components:
			
 
				+    <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
			
 
				+    Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
			
 
				+    Note that we don't break the components up in smaller bits
			
 
				+    (e.g. netloc is a single string) and we don't expand % escapes."""
			
 
				+    url, scheme, _coerce_result = _coerce_args(url, scheme)
			
 
				+    splitresult = _urlsplit(url, scheme, allow_fragments)
			
 
				+    scheme, netloc, url, query, fragment = splitresult
			
 
				+    if scheme in uses_params and ";" in url:
			
 
				+        url, params = _splitparams(url)
			
 
				+    else:
			
 
				+        params = ""
			
 
				+    result = ParseResult(scheme, netloc, url, params, query, fragment)
			
 
				+    return _coerce_result(result)
			
 
				+
			
 
				+
			
 
				+# Copied from urllib.parse.urlsplit() with
			
 
				+# https://github.com/python/cpython/pull/661 applied.
			
 
				+# This fix has been backported to Python 3.8.
			
 
				+# TODO: Remove this once we drop support for Python < 3.8
			
 
				+def _urlsplit(url, scheme="", allow_fragments=True):
			
 
				+    """Parse a URL into 5 components:
			
 
				+    <scheme>://<netloc>/<path>?<query>#<fragment>
			
 
				+    Return a 5-tuple: (scheme, netloc, path, query, fragment).
			
 
				+    Note that we don't break the components up in smaller bits
			
 
				+    (e.g. netloc is a single string) and we don't expand % escapes."""
			
 
				+    url, scheme, _coerce_result = _coerce_args(url, scheme)
			
 
				+    netloc = query = fragment = ""
			
 
				+    i = url.find(":")
			
 
				+    if i > 0:
			
 
				+        for c in url[:i]:
			
 
				+            if c not in scheme_chars:
			
 
				+                break
			
 
				+        else:
			
 
				+            scheme, url = url[:i].lower(), url[i + 1 :]
			
 
				+
			
 
				+    if url[:2] == "//":
			
 
				+        netloc, url = _splitnetloc(url, 2)
			
 
				+        if ("[" in netloc and "]" not in netloc) or (
			
 
				+            "]" in netloc and "[" not in netloc
			
 
				+        ):
			
 
				+            raise ValueError("Invalid IPv6 URL")
			
 
				+    if allow_fragments and "#" in url:
			
 
				+        url, fragment = url.split("#", 1)
			
 
				+    if "?" in url:
			
 
				+        url, query = url.split("?", 1)
			
 
				+    v = SplitResult(scheme, netloc, url, query, fragment)
			
 
				+    return _coerce_result(v)
			
 
				+
			
 
				+
			
 
				+def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
			
 
				+    # Chrome considers any URL with more than two slashes to be absolute, but
			
 
				+    # urlparse is not so flexible. Treat any url with three slashes as unsafe.
			
 
				+    if url.startswith("///"):
			
 
				+        return False
			
 
				+    try:
			
 
				+        url_info = _urlparse(url)
			
 
				+    except ValueError:  # e.g. invalid IPv6 addresses
			
 
				+        return False
			
 
				+    # Forbid URLs like http:///example.com - with a scheme, but without a hostname.
			
 
				+    # In that URL, example.com is not the hostname but, a path component. However,
			
 
				+    # Chrome will still consider example.com to be the hostname, so we must not
			
 
				+    # allow this syntax.
			
 
				+    if not url_info.netloc and url_info.scheme:
			
 
				+        return False
			
 
				+    # Forbid URLs that start with control characters. Some browsers (like
			
 
				+    # Chrome) ignore quite a few control characters at the start of a
			
 
				+    # URL and might consider the URL as scheme relative.
			
 
				+    if unicodedata.category(url[0])[0] == "C":
			
 
				+        return False
			
 
				+    scheme = url_info.scheme
			
 
				+    # Consider URLs without a scheme (e.g. //example.com/p) to be http.
			
 
				+    if not url_info.scheme and url_info.netloc:
			
 
				+        scheme = "http"
			
 
				+    valid_schemes = ["https"] if require_https else ["http", "https"]
			
 
				+    return (not url_info.netloc or url_info.netloc in allowed_hosts) and (
			
 
				+        not scheme or scheme in valid_schemes
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def is_safe_url(url, allowed_hosts, require_https=False):
			
 
				+    """
			
 
				+    Return ``True`` if the url uses an allowed host and a safe scheme.
			
 
				+    Always return ``False`` on an empty url.
			
 
				+    If ``require_https`` is ``True``, only 'https' will be considered a valid
			
 
				+    scheme, as opposed to 'http' and 'https' with the default, ``False``.
			
 
				+    Note: "True" doesn't entail that a URL is "safe". It may still be e.g.
			
 
				+    quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri()
			
 
				+    on the path component of untrusted URLs.
			
 
				+    """
			
 
				+    if url is not None:
			
 
				+        url = url.strip()
			
 
				+    if not url:
			
 
				+        return False
			
 
				+    if allowed_hosts is None:
			
 
				+        allowed_hosts = set()
			
 
				+    elif isinstance(allowed_hosts, str):
			
 
				+        allowed_hosts = {allowed_hosts}
			
 
				+    # Chrome treats \ completely as / in paths but it could be part of some
			
 
				+    # basic auth credentials so we need to check both URLs.
			
 
				+    return _url_has_allowed_host_and_scheme(
			
 
				+        url, allowed_hosts, require_https=require_https
			
 
				+    ) and _url_has_allowed_host_and_scheme(
			
 
				+        url.replace("\\", "/"), allowed_hosts, require_https=require_https
			
 
				+    )
			
--- a/tests/unit/utils/test_http.py
+++ b/tests/unit/utils/test_http.py
@@ -0,0 +1,95 @@
 
				+# taken from Django 3.2.0.alpha0 and adapted to run in pytest
			
 
				+from flaskbb.utils.http import is_safe_url
			
 
				+
			
 
				+
			
 
				+def test_bad_urls():
			
 
				+    bad_urls = (
			
 
				+        "http://example.com",
			
 
				+        "http:///example.com",
			
 
				+        "https://example.com",
			
 
				+        "ftp://example.com",
			
 
				+        r"\\example.com",
			
 
				+        r"\\\example.com",
			
 
				+        r"/\\/example.com",
			
 
				+        r"\\\example.com",
			
 
				+        r"\\example.com",
			
 
				+        r"\\//example.com",
			
 
				+        r"/\/example.com",
			
 
				+        r"\/example.com",
			
 
				+        r"/\example.com",
			
 
				+        "http:///example.com",
			
 
				+        r"http:/\//example.com",
			
 
				+        r"http:\/example.com",
			
 
				+        r"http:/\example.com",
			
 
				+        'javascript:alert("XSS")',
			
 
				+        "\njavascript:alert(x)",
			
 
				+        "\x08//example.com",
			
 
				+        r"http://otherserver\@example.com",
			
 
				+        r"http:\\testserver\@example.com",
			
 
				+        r"http://testserver\me:pass@example.com",
			
 
				+        r"http://testserver\@example.com",
			
 
				+        r"http:\\testserver\confirm\me@example.com",
			
 
				+        "http:999999999",
			
 
				+        "ftp:9999999999",
			
 
				+        "\n",
			
 
				+        "http://[2001:cdba:0000:0000:0000:0000:3257:9652/",
			
 
				+        "http://2001:cdba:0000:0000:0000:0000:3257:9652]/",
			
 
				+    )
			
 
				+    for bad_url in bad_urls:
			
 
				+        assert not is_safe_url(bad_url, allowed_hosts={"testserver", "testserver2"})
			
 
				+
			
 
				+
			
 
				+def test_good_urls():
			
 
				+    good_urls = (
			
 
				+        "/view/?param=http://example.com",
			
 
				+        "/view/?param=https://example.com",
			
 
				+        "/view?param=ftp://example.com",
			
 
				+        "view/?param=//example.com",
			
 
				+        "https://testserver/",
			
 
				+        "HTTPS://testserver/",
			
 
				+        "//testserver/",
			
 
				+        "http://testserver/confirm?email=me@example.com",
			
 
				+        "/url%20with%20spaces/",
			
 
				+        "path/http:2222222222",
			
 
				+    )
			
 
				+    for good_url in good_urls:
			
 
				+        assert is_safe_url(good_url, allowed_hosts={"otherserver", "testserver"})
			
 
				+
			
 
				+
			
 
				+def test_basic_auth():
			
 
				+    # Valid basic auth credentials are allowed.
			
 
				+    assert is_safe_url(
			
 
				+        r"http://user:pass@testserver/", allowed_hosts={"user:pass@testserver"}
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def test_no_allowed_hosts():
			
 
				+    # A path without host is allowed.
			
 
				+    assert is_safe_url("/confirm/me@example.com", allowed_hosts=None)
			
 
				+    # Basic auth without host is not allowed.
			
 
				+    assert not is_safe_url(r"http://testserver\@example.com", allowed_hosts=None)
			
 
				+
			
 
				+
			
 
				+def test_allowed_hosts_str():
			
 
				+    assert is_safe_url("http://good.com/good", allowed_hosts="good.com")
			
 
				+    assert not is_safe_url("http://good.co/evil", allowed_hosts="good.com")
			
 
				+
			
 
				+
			
 
				+def test_secure_param_https_urls():
			
 
				+    secure_urls = (
			
 
				+        "https://example.com/p",
			
 
				+        "HTTPS://example.com/p",
			
 
				+        "/view/?param=http://example.com",
			
 
				+    )
			
 
				+    for url in secure_urls:
			
 
				+        assert is_safe_url(url, allowed_hosts={"example.com"}, require_https=True)
			
 
				+
			
 
				+
			
 
				+def test_secure_param_non_https_urls():
			
 
				+    insecure_urls = (
			
 
				+        "http://example.com/p",
			
 
				+        "ftp://example.com/p",
			
 
				+        "//example.com/p",
			
 
				+    )
			
 
				+    for url in insecure_urls:
			
 
				+        assert not is_safe_url(url, allowed_hosts={"example.com"}, require_https=True)