|
@@ -0,0 +1,131 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""
|
|
|
+ flaskbb.utils.http
|
|
|
+ ~~~~~~~~~~~~~~~~~~
|
|
|
+
|
|
|
+ Provides a utility function that attempts to validate an URL against
|
|
|
+ a set of valid hosts.
|
|
|
+
|
|
|
+ See https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet
|
|
|
+ for more information about this topic.
|
|
|
+
|
|
|
+ Note: Most of this code has been taken from Django 3.2.0.alpha0.
|
|
|
+"""
|
|
|
+import unicodedata
|
|
|
+from urllib.parse import (
|
|
|
+ ParseResult,
|
|
|
+ SplitResult,
|
|
|
+ _coerce_args,
|
|
|
+ _splitnetloc,
|
|
|
+ _splitparams,
|
|
|
+ scheme_chars,
|
|
|
+ uses_params,
|
|
|
+)
|
|
|
+
|
|
|
+# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.
|
|
|
+def _urlparse(url, scheme="", allow_fragments=True):
|
|
|
+ """Parse a URL into 6 components:
|
|
|
+ <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
|
|
|
+ Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
|
|
|
+ Note that we don't break the components up in smaller bits
|
|
|
+ (e.g. netloc is a single string) and we don't expand % escapes."""
|
|
|
+ url, scheme, _coerce_result = _coerce_args(url, scheme)
|
|
|
+ splitresult = _urlsplit(url, scheme, allow_fragments)
|
|
|
+ scheme, netloc, url, query, fragment = splitresult
|
|
|
+ if scheme in uses_params and ";" in url:
|
|
|
+ url, params = _splitparams(url)
|
|
|
+ else:
|
|
|
+ params = ""
|
|
|
+ result = ParseResult(scheme, netloc, url, params, query, fragment)
|
|
|
+ return _coerce_result(result)
|
|
|
+
|
|
|
+
|
|
|
+# Copied from urllib.parse.urlsplit() with
|
|
|
+# https://github.com/python/cpython/pull/661 applied.
|
|
|
+# This fix has been backported to Python 3.8.
|
|
|
+# TODO: Remove this once we drop support for Python < 3.8
|
|
|
+def _urlsplit(url, scheme="", allow_fragments=True):
|
|
|
+ """Parse a URL into 5 components:
|
|
|
+ <scheme>://<netloc>/<path>?<query>#<fragment>
|
|
|
+ Return a 5-tuple: (scheme, netloc, path, query, fragment).
|
|
|
+ Note that we don't break the components up in smaller bits
|
|
|
+ (e.g. netloc is a single string) and we don't expand % escapes."""
|
|
|
+ url, scheme, _coerce_result = _coerce_args(url, scheme)
|
|
|
+ netloc = query = fragment = ""
|
|
|
+ i = url.find(":")
|
|
|
+ if i > 0:
|
|
|
+ for c in url[:i]:
|
|
|
+ if c not in scheme_chars:
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ scheme, url = url[:i].lower(), url[i + 1 :]
|
|
|
+
|
|
|
+ if url[:2] == "//":
|
|
|
+ netloc, url = _splitnetloc(url, 2)
|
|
|
+ if ("[" in netloc and "]" not in netloc) or (
|
|
|
+ "]" in netloc and "[" not in netloc
|
|
|
+ ):
|
|
|
+ raise ValueError("Invalid IPv6 URL")
|
|
|
+ if allow_fragments and "#" in url:
|
|
|
+ url, fragment = url.split("#", 1)
|
|
|
+ if "?" in url:
|
|
|
+ url, query = url.split("?", 1)
|
|
|
+ v = SplitResult(scheme, netloc, url, query, fragment)
|
|
|
+ return _coerce_result(v)
|
|
|
+
|
|
|
+
|
|
|
+def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
|
|
+ # Chrome considers any URL with more than two slashes to be absolute, but
|
|
|
+ # urlparse is not so flexible. Treat any url with three slashes as unsafe.
|
|
|
+ if url.startswith("///"):
|
|
|
+ return False
|
|
|
+ try:
|
|
|
+ url_info = _urlparse(url)
|
|
|
+ except ValueError: # e.g. invalid IPv6 addresses
|
|
|
+ return False
|
|
|
+ # Forbid URLs like http:///example.com - with a scheme, but without a hostname.
|
|
|
+ # In that URL, example.com is not the hostname but, a path component. However,
|
|
|
+ # Chrome will still consider example.com to be the hostname, so we must not
|
|
|
+ # allow this syntax.
|
|
|
+ if not url_info.netloc and url_info.scheme:
|
|
|
+ return False
|
|
|
+ # Forbid URLs that start with control characters. Some browsers (like
|
|
|
+ # Chrome) ignore quite a few control characters at the start of a
|
|
|
+ # URL and might consider the URL as scheme relative.
|
|
|
+ if unicodedata.category(url[0])[0] == "C":
|
|
|
+ return False
|
|
|
+ scheme = url_info.scheme
|
|
|
+ # Consider URLs without a scheme (e.g. //example.com/p) to be http.
|
|
|
+ if not url_info.scheme and url_info.netloc:
|
|
|
+ scheme = "http"
|
|
|
+ valid_schemes = ["https"] if require_https else ["http", "https"]
|
|
|
+ return (not url_info.netloc or url_info.netloc in allowed_hosts) and (
|
|
|
+ not scheme or scheme in valid_schemes
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+def is_safe_url(url, allowed_hosts, require_https=False):
|
|
|
+ """
|
|
|
+ Return ``True`` if the url uses an allowed host and a safe scheme.
|
|
|
+ Always return ``False`` on an empty url.
|
|
|
+ If ``require_https`` is ``True``, only 'https' will be considered a valid
|
|
|
+ scheme, as opposed to 'http' and 'https' with the default, ``False``.
|
|
|
+ Note: "True" doesn't entail that a URL is "safe". It may still be e.g.
|
|
|
+ quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri()
|
|
|
+ on the path component of untrusted URLs.
|
|
|
+ """
|
|
|
+ if url is not None:
|
|
|
+ url = url.strip()
|
|
|
+ if not url:
|
|
|
+ return False
|
|
|
+ if allowed_hosts is None:
|
|
|
+ allowed_hosts = set()
|
|
|
+ elif isinstance(allowed_hosts, str):
|
|
|
+ allowed_hosts = {allowed_hosts}
|
|
|
+ # Chrome treats \ completely as / in paths but it could be part of some
|
|
|
+ # basic auth credentials so we need to check both URLs.
|
|
|
+ return _url_has_allowed_host_and_scheme(
|
|
|
+ url, allowed_hosts, require_https=require_https
|
|
|
+ ) and _url_has_allowed_host_and_scheme(
|
|
|
+ url.replace("\\", "/"), allowed_hosts, require_https=require_https
|
|
|
+ )
|