Browse Source

Add cow_http_hd:parse_content_language/1

From RFC7231 and RFC5646.

The ABNF for language tags is terrible. It makes parsing efficiently
a big challenge and the result is this huge ugly set of functions.
Thankfully triq allows us to make sure the implementation is correct.
A large number of examples has also been extracted from both RFCs.

The various ?IS_ALPHA(C), ?IS_TOKEN(C) and so on have received a
change: they now use 'orelse' instead of ';'. This is because in
this new code we need to check more than one character per clause.

The compilation time for this module increased dramatically.
Apparently happens because the guards are too big. Using ranges
($a =< C =< $z) instead of the current solution makes compilation
much faster, but the function executes twice as slow which is not
acceptable.
Loïc Hoguin 10 years ago
parent
commit
1f8a70d086
2 changed files with 382 additions and 20 deletions
  1. 53 17
      include/cow_inline.hrl
  2. 329 3
      src/cow_http_hd.erl

+ 53 - 17
include/cow_inline.hrl

@@ -18,27 +18,31 @@
 %% IS_ALPHA(Character)
 
 -define(IS_ALPHA(C),
-	C =:= $a; C =:= $b; C =:= $c; C =:= $d; C =:= $e;
-	C =:= $f; C =:= $g; C =:= $h; C =:= $i; C =:= $j;
-	C =:= $k; C =:= $l; C =:= $m; C =:= $n; C =:= $o;
-	C =:= $p; C =:= $q; C =:= $r; C =:= $s; C =:= $t;
-	C =:= $u; C =:= $v; C =:= $w; C =:= $x; C =:= $y;
-	C =:= $z;
-	C =:= $A; C =:= $B; C =:= $C; C =:= $D; C =:= $E;
-	C =:= $F; C =:= $G; C =:= $H; C =:= $I; C =:= $J;
-	C =:= $K; C =:= $L; C =:= $M; C =:= $N; C =:= $O;
-	C =:= $P; C =:= $Q; C =:= $R; C =:= $S; C =:= $T;
-	C =:= $U; C =:= $V; C =:= $W; C =:= $X; C =:= $Y;
+	C =:= $a orelse C =:= $b orelse C =:= $c orelse C =:= $d orelse C =:= $e orelse
+	C =:= $f orelse C =:= $g orelse C =:= $h orelse C =:= $i orelse C =:= $j orelse
+	C =:= $k orelse C =:= $l orelse C =:= $m orelse C =:= $n orelse C =:= $o orelse
+	C =:= $p orelse C =:= $q orelse C =:= $r orelse C =:= $s orelse C =:= $t orelse
+	C =:= $u orelse C =:= $v orelse C =:= $w orelse C =:= $x orelse C =:= $y orelse
+	C =:= $z orelse
+	C =:= $A orelse C =:= $B orelse C =:= $C orelse C =:= $D orelse C =:= $E orelse
+	C =:= $F orelse C =:= $G orelse C =:= $H orelse C =:= $I orelse C =:= $J orelse
+	C =:= $K orelse C =:= $L orelse C =:= $M orelse C =:= $N orelse C =:= $O orelse
+	C =:= $P orelse C =:= $Q orelse C =:= $R orelse C =:= $S orelse C =:= $T orelse
+	C =:= $U orelse C =:= $V orelse C =:= $W orelse C =:= $X orelse C =:= $Y orelse
 	C =:= $Z
 ).
 
 %% IS_DIGIT(Character)
 
 -define(IS_DIGIT(C),
-	C =:= $0; C =:= $1; C =:= $2; C =:= $3; C =:= $4;
-	C =:= $5; C =:= $6; C =:= $7; C =:= $8; C =:= $9
+	C =:= $0 orelse C =:= $1 orelse C =:= $2 orelse C =:= $3 orelse C =:= $4 orelse
+	C =:= $5 orelse C =:= $6 orelse C =:= $7 orelse C =:= $8 orelse C =:= $9
 ).
 
+%% IS_ALPHANUM(Character)
+
+-define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)).
+
 %% IS_ETAGC(Character)
 
 -define(IS_ETAGC(C), C =:= 16#21; C >= 16#23, C =/= 16#7f).
@@ -46,16 +50,48 @@
 %% IS_TOKEN(Character)
 
 -define(IS_TOKEN(C),
-	?IS_ALPHA(C); ?IS_DIGIT(C);
-	C =:= $!; C =:= $#; C =:= $$; C =:= $%; C =:= $&;
-	C =:= $'; C =:= $*; C =:= $+; C =:= $-; C =:= $.;
-	C =:= $^; C =:= $_; C =:= $`; C =:= $|; C =:= $~
+	?IS_ALPHA(C) orelse ?IS_DIGIT(C)
+	orelse C =:= $! orelse C =:= $# orelse C =:= $$ orelse C =:= $% orelse C =:= $&
+	orelse C =:= $' orelse C =:= $* orelse C =:= $+ orelse C =:= $- orelse C =:= $.
+	orelse C =:= $^ orelse C =:= $_ orelse C =:= $` orelse C =:= $| orelse C =:= $~
 ).
 
 %% IS_VCHAR(Character)
 
 -define(IS_VCHAR(C), C =:= $\t; C > 31, C =/= 127).
 
+%% LC(Character)
+
+-define(LC(C), case C of
+	$A -> $a;
+	$B -> $b;
+	$C -> $c;
+	$D -> $d;
+	$E -> $e;
+	$F -> $f;
+	$G -> $g;
+	$H -> $h;
+	$I -> $i;
+	$J -> $j;
+	$K -> $k;
+	$L -> $l;
+	$M -> $m;
+	$N -> $n;
+	$O -> $o;
+	$P -> $p;
+	$Q -> $q;
+	$R -> $r;
+	$S -> $s;
+	$T -> $t;
+	$U -> $u;
+	$V -> $v;
+	$W -> $w;
+	$X -> $x;
+	$Y -> $y;
+	$Z -> $z;
+	_ -> C
+end).
+
 %% INLINE_LOWERCASE(Function, Rest, Acc, ...)
 %%
 %% To be included at the end of a case block.

+ 329 - 3
src/cow_http_hd.erl

@@ -20,6 +20,7 @@
 -export([parse_accept_language/1]).
 -export([parse_connection/1]).
 -export([parse_content_encoding/1]).
+-export([parse_content_language/1]).
 -export([parse_content_length/1]).
 -export([parse_content_type/1]).
 -export([parse_date/1]).
@@ -57,6 +58,7 @@ ows() ->
 
 alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
 alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
+digit_chars() -> "0123456789".
 
 alpha() ->
 	oneof(alpha_chars()).
@@ -64,6 +66,9 @@ alpha() ->
 alphanum() ->
 	oneof(alphanum_chars()).
 
+digit() ->
+	oneof(digit_chars()).
+
 tchar() ->
 	frequency([
 		{1, oneof([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])},
@@ -621,7 +626,7 @@ language_range_list_sep(<< $\t, R/bits >>, Acc) -> language_range_list_sep(R, Ac
 language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc).
 
 -ifdef(TEST).
-language_tag() ->
+language_range_tag() ->
 	oneof([
 		[alpha()],
 		[alpha(), alpha()],
@@ -633,7 +638,7 @@ language_tag() ->
 		[alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha()]
 	]).
 
-language_subtag() ->
+language_range_subtag() ->
 	[$-, oneof([
 		[alphanum()],
 		[alphanum(), alphanum()],
@@ -646,7 +651,7 @@ language_subtag() ->
 	])].
 
 language_range() ->
-	[language_tag(), list(language_subtag())].
+	[language_range_tag(), list(language_range_subtag())].
 
 accept_language() ->
 	?LET({R, W},
@@ -786,6 +791,327 @@ horse_parse_content_encoding() ->
 	).
 -endif.
 
+%% @doc Parse the Content-Language header.
+%%
+%% We do not support irregular deprecated tags that do not match the ABNF.
+
+-spec parse_content_language(binary()) -> [binary()].
+parse_content_language(ContentLanguage) ->
+	nonempty(langtag_list(ContentLanguage, [])).
+
+langtag_list(<<>>, Acc) -> lists:reverse(Acc);
+langtag_list(<< $\s, R/bits >>, Acc) -> langtag_list(R, Acc);
+langtag_list(<< $\t, R/bits >>, Acc) -> langtag_list(R, Acc);
+langtag_list(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
+langtag_list(<< A, B, C, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
+	langtag_extlang(R, Acc, << ?LC(A), ?LC(B), ?LC(C) >>, 0);
+langtag_list(<< A, B, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
+	langtag_extlang(R, Acc, << ?LC(A), ?LC(B) >>, 0);
+langtag_list(<< X, R/bits >>, Acc) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << $x >>, 0).
+
+langtag_extlang(<<>>, Acc, T, _) -> lists:reverse([T|Acc]);
+langtag_extlang(<< $,, R/bits >>, Acc, T, _) -> langtag_list(R, [T|Acc]);
+langtag_extlang(<< $\s, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
+langtag_extlang(<< $\t, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
+langtag_extlang(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, _)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
+langtag_extlang(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, _)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
+langtag_extlang(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, _)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
+langtag_extlang(<< $-, A, B, C, D, E, R/bits >>, Acc, T, _)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
+langtag_extlang(<< $-, A, B, C, D, R/bits >>, Acc, T, _)
+		when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
+	langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
+langtag_extlang(<< $-, A, B, C, R/bits >>, Acc, T, N)
+		when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
+	case N of
+		2 -> langtag_script(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>);
+		_ -> langtag_extlang(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1)
+	end;
+langtag_extlang(R, Acc, T, _) -> langtag_region(R, Acc, T).
+
+langtag_script(<<>>, Acc, T) -> lists:reverse([T|Acc]);
+langtag_script(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
+langtag_script(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_script(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_script(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
+langtag_script(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
+langtag_script(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
+langtag_script(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
+langtag_script(<< $-, A, B, C, D, R/bits >>, Acc, T)
+		when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
+	langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
+langtag_script(R, Acc, T) ->
+	langtag_region(R, Acc, T).
+
+langtag_region(<<>>, Acc, T) -> lists:reverse([T|Acc]);
+langtag_region(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
+langtag_region(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_region(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_region(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
+langtag_region(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
+langtag_region(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
+langtag_region(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
+langtag_region(<< $-, A, B, C, D, R/bits >>, Acc, T)
+		when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
+	langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
+langtag_region(<< $-, A, B, R/bits >>, Acc, T) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>);
+langtag_region(<< $-, A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
+	langtag_variant(R, Acc, << T/binary, $-, A, B, C >>);
+langtag_region(R, Acc, T) ->
+	langtag_variant(R, Acc, T).
+
+langtag_variant(<<>>, Acc, T) -> lists:reverse([T|Acc]);
+langtag_variant(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
+langtag_variant(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_variant(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_variant(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
+langtag_variant(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
+langtag_variant(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
+langtag_variant(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
+	langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
+langtag_variant(<< $-, A, B, C, D, R/bits >>, Acc, T)
+		when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
+	langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
+langtag_variant(R, Acc, T) ->
+	langtag_extension(R, Acc, T).
+
+langtag_extension(<<>>, Acc, T) -> lists:reverse([T|Acc]);
+langtag_extension(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
+langtag_extension(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_extension(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
+langtag_extension(<< $-, X, R/bits >>, Acc, T) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << T/binary, $-, $x >>, 0);
+langtag_extension(<< $-, S, R/bits >>, Acc, T) when ?IS_ALPHANUM(S) -> langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(S) >>, 0).
+
+langtag_extension_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
+langtag_extension_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
+langtag_extension_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
+langtag_extension_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
+langtag_extension_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
+langtag_extension_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
+langtag_extension_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
+langtag_extension_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
+langtag_extension_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D)  ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
+langtag_extension_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C)  ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
+langtag_extension_sub(<< $-, A, B, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B)  ->
+	langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
+langtag_extension_sub(R, Acc, T, N) when N > 0 ->
+	langtag_extension(R, Acc, T).
+
+langtag_privateuse_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
+langtag_privateuse_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
+langtag_privateuse_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
+langtag_privateuse_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
+langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
+			?IS_ALPHANUM(E), ?IS_ALPHANUM(F)  ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, B, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
+langtag_privateuse_sub(<< $-, A, R/bits >>, Acc, T, N)
+		when ?IS_ALPHANUM(A) ->
+	langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A) >>, N + 1).
+
+langtag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
+langtag_list_sep(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
+langtag_list_sep(<< $\s, R/bits >>, Acc) -> langtag_list_sep(R, Acc);
+langtag_list_sep(<< $\t, R/bits >>, Acc) -> langtag_list_sep(R, Acc).
+
+-ifdef(TEST).
+vector(Min, Max, Dom) -> ?LET(N, choose(Min, Max), vector(N, Dom)).
+small_list(Dom) -> vector(0, 10, Dom).
+small_non_empty_list(Dom) -> vector(1, 10, Dom).
+
+langtag_language() -> vector(2, 3, alpha()).
+langtag_extlang() -> vector(0, 3, [$-, alpha(), alpha(), alpha()]).
+langtag_script() -> oneof([[], [$-, alpha(), alpha(), alpha(), alpha()]]).
+langtag_region() -> oneof([[], [$-, alpha(), alpha()], [$-, digit(), digit(), digit()]]).
+
+langtag_variant() ->
+	small_list(frequency([
+		{4, [$-, vector(5, 8, alphanum())]},
+		{1, [$-, digit(), alphanum(), alphanum(), alphanum()]}
+	])).
+
+langtag_extension() ->
+	small_list([$-, ?SUCHTHAT(S, alphanum(), S =/= $x andalso S =/= $X),
+		small_non_empty_list([$-, vector(2, 8, alphanum())])
+	]).
+
+langtag_privateuse() -> oneof([[], [$-, langtag_privateuse_nodash()]]).
+langtag_privateuse_nodash() -> [oneof([$x, $X]), small_non_empty_list([$-, vector(1, 8, alphanum())])].
+private_language_tag() -> ?LET(T, langtag_privateuse_nodash(), iolist_to_binary(T)).
+
+language_tag() ->
+	?LET(IoList,
+		[langtag_language(), langtag_extlang(), langtag_script(), langtag_region(),
+			langtag_variant(), langtag_extension(), langtag_privateuse()],
+		iolist_to_binary(IoList)).
+
+content_language() ->
+	?LET(L,
+		non_empty(list(frequency([
+			{90, language_tag()},
+			{10, private_language_tag()}
+		]))),
+		begin
+			<< _, ContentLanguage/binary >> = iolist_to_binary([[$,, T] || T <- L]),
+			{L, ContentLanguage}
+		end).
+
+prop_parse_content_language() ->
+	?FORALL({L, ContentLanguage},
+		content_language(),
+		begin
+			ResL = parse_content_language(ContentLanguage),
+			CheckedL = [?INLINE_LOWERCASE_BC(T) =:= ResT || {T, ResT} <- lists:zip(L, ResL)],
+			[true] =:= lists:usort(CheckedL)
+		end).
+
+parse_content_language_test_() ->
+	Tests = [
+		{<<"de">>, [<<"de">>]},
+		{<<"fr">>, [<<"fr">>]},
+		{<<"ja">>, [<<"ja">>]},
+		{<<"zh-Hant">>, [<<"zh-hant">>]},
+		{<<"zh-Hans">>, [<<"zh-hans">>]},
+		{<<"sr-Cyrl">>, [<<"sr-cyrl">>]},
+		{<<"sr-Latn">>, [<<"sr-latn">>]},
+		{<<"zh-cmn-Hans-CN">>, [<<"zh-cmn-hans-cn">>]},
+		{<<"cmn-Hans-CN">>, [<<"cmn-hans-cn">>]},
+		{<<"zh-yue-HK">>, [<<"zh-yue-hk">>]},
+		{<<"yue-HK">>, [<<"yue-hk">>]},
+		{<<"zh-Hans-CN">>, [<<"zh-hans-cn">>]},
+		{<<"sr-Latn-RS">>, [<<"sr-latn-rs">>]},
+		{<<"sl-rozaj">>, [<<"sl-rozaj">>]},
+		{<<"sl-rozaj-biske">>, [<<"sl-rozaj-biske">>]},
+		{<<"sl-nedis">>, [<<"sl-nedis">>]},
+		{<<"de-CH-1901">>, [<<"de-ch-1901">>]},
+		{<<"sl-IT-nedis">>, [<<"sl-it-nedis">>]},
+		{<<"hy-Latn-IT-arevela">>, [<<"hy-latn-it-arevela">>]},
+		{<<"de-DE">>, [<<"de-de">>]},
+		{<<"en-US">>, [<<"en-us">>]},
+		{<<"es-419">>, [<<"es-419">>]},
+		{<<"de-CH-x-phonebk">>, [<<"de-ch-x-phonebk">>]},
+		{<<"az-Arab-x-AZE-derbend">>, [<<"az-arab-x-aze-derbend">>]},
+		{<<"x-whatever">>, [<<"x-whatever">>]},
+		{<<"qaa-Qaaa-QM-x-southern">>, [<<"qaa-qaaa-qm-x-southern">>]},
+		{<<"de-Qaaa">>, [<<"de-qaaa">>]},
+		{<<"sr-Latn-QM">>, [<<"sr-latn-qm">>]},
+		{<<"sr-Qaaa-RS">>, [<<"sr-qaaa-rs">>]},
+		{<<"en-US-u-islamcal">>, [<<"en-us-u-islamcal">>]},
+		{<<"zh-CN-a-myext-x-private">>, [<<"zh-cn-a-myext-x-private">>]},
+		{<<"en-a-myext-b-another">>, [<<"en-a-myext-b-another">>]},
+		{<<"mn-Cyrl-MN">>, [<<"mn-cyrl-mn">>]},
+		{<<"MN-cYRL-mn">>, [<<"mn-cyrl-mn">>]},
+		{<<"mN-cYrL-Mn">>, [<<"mn-cyrl-mn">>]},
+		{<<"az-Arab-IR">>, [<<"az-arab-ir">>]},
+		{<<"zh-gan">>, [<<"zh-gan">>]},
+		{<<"zh-yue">>, [<<"zh-yue">>]},
+		{<<"zh-cmn">>, [<<"zh-cmn">>]},
+		{<<"de-AT">>, [<<"de-at">>]},
+		{<<"de-CH-1996">>, [<<"de-ch-1996">>]},
+		{<<"en-Latn-GB-boont-r-extended-sequence-x-private">>,
+			[<<"en-latn-gb-boont-r-extended-sequence-x-private">>]},
+		{<<"el-x-koine">>, [<<"el-x-koine">>]},
+		{<<"el-x-attic">>, [<<"el-x-attic">>]},
+		{<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>,
+			[<<"fr">>, <<"en-us">>, <<"es-419">>, <<"az-arab">>, <<"x-pig-latin">>, <<"man-nkoo-gn">>]},
+		{<<"da">>, [<<"da">>]},
+		{<<"mi, en">>, [<<"mi">>, <<"en">>]}
+	],
+	[{V, fun() -> R = parse_content_language(V) end} || {V, R} <- Tests].
+
+parse_content_language_error_test_() ->
+	Tests = [
+		<<>>
+	],
+	[{V, fun() -> {'EXIT', _} = (catch parse_content_language(V)) end} || V <- Tests].
+-endif.
+
+-ifdef(PERF).
+horse_parse_content_language() ->
+	horse:repeat(100000,
+		parse_content_language(<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>)
+	).
+-endif.
+
 %% @doc Parse the Content-Length header.
 %%
 %% The value has at least one digit, and may be followed by whitespace.