cowboy_http.erl 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. %% Copyright (c) 2011, Loïc Hoguin <essen@dev-extend.eu>
  2. %% Copyright (c) 2011, Anthony Ramine <nox@dev-extend.eu>
  3. %%
  4. %% Permission to use, copy, modify, and/or distribute this software for any
  5. %% purpose with or without fee is hereby granted, provided that the above
  6. %% copyright notice and this permission notice appear in all copies.
  7. %%
  8. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. %% @doc Core HTTP parsing API.
  16. -module(cowboy_http).
  17. %% Parsing.
  18. -export([list/2, nonempty_list/2, content_type/1,
  19. media_range/2, conneg/2, language_range/2, entity_tag_match/1,
  20. http_date/1, rfc1123_date/1, rfc850_date/1, asctime_date/1,
  21. digits/1, token/2, token_ci/2, quoted_string/2]).
  22. %% Interpretation.
  23. -export([connection_to_atom/1, urldecode/1, urldecode/2, urlencode/1,
  24. urlencode/2]).
  25. -include("include/http.hrl").
  26. -include_lib("eunit/include/eunit.hrl").
  27. %% Parsing.
  28. %% @doc Parse a non-empty list of the given type.
  29. -spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
  30. nonempty_list(Data, Fun) ->
  31. case list(Data, Fun, []) of
  32. {error, badarg} -> {error, badarg};
  33. [] -> {error, badarg};
  34. L -> lists:reverse(L)
  35. end.
  36. %% @doc Parse a list of the given type.
  37. -spec list(binary(), fun()) -> list() | {error, badarg}.
  38. list(Data, Fun) ->
  39. case list(Data, Fun, []) of
  40. {error, badarg} -> {error, badarg};
  41. L -> lists:reverse(L)
  42. end.
  43. -spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
  44. %% From the RFC:
  45. %% <blockquote>Wherever this construct is used, null elements are allowed,
  46. %% but do not contribute to the count of elements present.
  47. %% That is, "(element), , (element) " is permitted, but counts
  48. %% as only two elements. Therefore, where at least one element is required,
  49. %% at least one non-null element MUST be present.</blockquote>
  50. list(Data, Fun, Acc) ->
  51. whitespace(Data,
  52. fun (<<>>) -> Acc;
  53. (<< $,, Rest/bits >>) -> list(Rest, Fun, Acc);
  54. (Rest) -> Fun(Rest,
  55. fun (D, I) -> whitespace(D,
  56. fun (<<>>) -> [I|Acc];
  57. (<< $,, R/bits >>) -> list(R, Fun, [I|Acc]);
  58. (_Any) -> {error, badarg}
  59. end)
  60. end)
  61. end).
  62. %% @doc Parse a content type.
  63. -spec content_type(binary()) -> any().
  64. content_type(Data) ->
  65. media_type(Data,
  66. fun (Rest, Type, SubType) ->
  67. content_type_params(Rest,
  68. fun (Params) -> {Type, SubType, Params} end, [])
  69. end).
  70. -spec content_type_params(binary(), fun(), list({binary(), binary()}))
  71. -> any().
  72. content_type_params(Data, Fun, Acc) ->
  73. whitespace(Data,
  74. fun (<< $;, Rest/bits >>) -> content_type_param(Rest, Fun, Acc);
  75. (<<>>) -> Fun(lists:reverse(Acc));
  76. (_Rest) -> {error, badarg}
  77. end).
  78. -spec content_type_param(binary(), fun(), list({binary(), binary()}))
  79. -> any().
  80. content_type_param(Data, Fun, Acc) ->
  81. whitespace(Data,
  82. fun (Rest) ->
  83. token_ci(Rest,
  84. fun (_Rest2, <<>>) -> {error, badarg};
  85. (<< $=, Rest2/bits >>, Attr) ->
  86. word(Rest2,
  87. fun (Rest3, Value) ->
  88. content_type_params(Rest3, Fun,
  89. [{Attr, Value}|Acc])
  90. end);
  91. (_Rest2, _Attr) -> {error, badarg}
  92. end)
  93. end).
  94. %% @doc Parse a media range.
  95. -spec media_range(binary(), fun()) -> any().
  96. media_range(Data, Fun) ->
  97. media_type(Data,
  98. fun (Rest, Type, SubType) ->
  99. media_range_params(Rest, Fun, Type, SubType, [])
  100. end).
  101. -spec media_range_params(binary(), fun(), binary(), binary(),
  102. [{binary(), binary()}]) -> any().
  103. media_range_params(Data, Fun, Type, SubType, Acc) ->
  104. whitespace(Data,
  105. fun (<< $;, Rest/bits >>) ->
  106. whitespace(Rest,
  107. fun (Rest2) ->
  108. media_range_param_attr(Rest2, Fun, Type, SubType, Acc)
  109. end);
  110. (Rest) -> Fun(Rest, {{Type, SubType, lists:reverse(Acc)}, 1000, []})
  111. end).
  112. -spec media_range_param_attr(binary(), fun(), binary(), binary(),
  113. [{binary(), binary()}]) -> any().
  114. media_range_param_attr(Data, Fun, Type, SubType, Acc) ->
  115. token_ci(Data,
  116. fun (_Rest, <<>>) -> {error, badarg};
  117. (<< $=, Rest/bits >>, Attr) ->
  118. media_range_param_value(Rest, Fun, Type, SubType, Acc, Attr)
  119. end).
  120. -spec media_range_param_value(binary(), fun(), binary(), binary(),
  121. [{binary(), binary()}], binary()) -> any().
  122. media_range_param_value(Data, Fun, Type, SubType, Acc, <<"q">>) ->
  123. qvalue(Data,
  124. fun (Rest, Quality) ->
  125. accept_ext(Rest, Fun, Type, SubType, Acc, Quality, [])
  126. end);
  127. media_range_param_value(Data, Fun, Type, SubType, Acc, Attr) ->
  128. word(Data,
  129. fun (Rest, Value) ->
  130. media_range_params(Rest, Fun,
  131. Type, SubType, [{Attr, Value}|Acc])
  132. end).
  133. %% @doc Parse a media type.
  134. -spec media_type(binary(), fun()) -> any().
  135. media_type(Data, Fun) ->
  136. token_ci(Data,
  137. fun (_Rest, <<>>) -> {error, badarg};
  138. (<< $/, Rest/bits >>, Type) ->
  139. token_ci(Rest,
  140. fun (_Rest2, <<>>) -> {error, badarg};
  141. (Rest2, SubType) -> Fun(Rest2, Type, SubType)
  142. end);
  143. (_Rest, _Type) -> {error, badarg}
  144. end).
  145. -spec accept_ext(binary(), fun(), binary(), binary(),
  146. [{binary(), binary()}], 0..1000,
  147. [{binary(), binary()} | binary()]) -> any().
  148. accept_ext(Data, Fun, Type, SubType, Params, Quality, Acc) ->
  149. whitespace(Data,
  150. fun (<< $;, Rest/bits >>) ->
  151. whitespace(Rest,
  152. fun (Rest2) ->
  153. accept_ext_attr(Rest2, Fun,
  154. Type, SubType, Params, Quality, Acc)
  155. end);
  156. (Rest) ->
  157. Fun(Rest, {{Type, SubType, lists:reverse(Params)},
  158. Quality, lists:reverse(Acc)})
  159. end).
  160. -spec accept_ext_attr(binary(), fun(), binary(), binary(),
  161. [{binary(), binary()}], 0..1000,
  162. [{binary(), binary()} | binary()]) -> any().
  163. accept_ext_attr(Data, Fun, Type, SubType, Params, Quality, Acc) ->
  164. token_ci(Data,
  165. fun (_Rest, <<>>) -> {error, badarg};
  166. (<< $=, Rest/bits >>, Attr) ->
  167. accept_ext_value(Rest, Fun, Type, SubType, Params,
  168. Quality, Acc, Attr);
  169. (Rest, Attr) ->
  170. accept_ext(Rest, Fun, Type, SubType, Params,
  171. Quality, [Attr|Acc])
  172. end).
  173. -spec accept_ext_value(binary(), fun(), binary(), binary(),
  174. [{binary(), binary()}], 0..1000,
  175. [{binary(), binary()} | binary()], binary()) -> any().
  176. accept_ext_value(Data, Fun, Type, SubType, Params, Quality, Acc, Attr) ->
  177. word(Data,
  178. fun (Rest, Value) ->
  179. accept_ext(Rest, Fun,
  180. Type, SubType, Params, Quality, [{Attr, Value}|Acc])
  181. end).
  182. %% @doc Parse a conneg header (Accept-Charset, Accept-Encoding),
  183. %% followed by an optional quality value.
  184. -spec conneg(binary(), fun()) -> any().
  185. conneg(Data, Fun) ->
  186. token_ci(Data,
  187. fun (_Rest, <<>>) -> {error, badarg};
  188. (Rest, Conneg) ->
  189. maybe_qparam(Rest,
  190. fun (Rest2, Quality) ->
  191. Fun(Rest2, {Conneg, Quality})
  192. end)
  193. end).
  194. %% @doc Parse a language range, followed by an optional quality value.
  195. -spec language_range(binary(), fun()) -> any().
  196. language_range(<< $*, Rest/bits >>, Fun) ->
  197. language_range_ret(Rest, Fun, '*');
  198. language_range(Data, Fun) ->
  199. language_tag(Data,
  200. fun (Rest, LanguageTag) ->
  201. language_range_ret(Rest, Fun, LanguageTag)
  202. end).
  203. -spec language_range_ret(binary(), fun(), '*' | {binary(), [binary()]}) -> any().
  204. language_range_ret(Data, Fun, LanguageTag) ->
  205. maybe_qparam(Data,
  206. fun (Rest, Quality) ->
  207. Fun(Rest, {LanguageTag, Quality})
  208. end).
  209. -spec language_tag(binary(), fun()) -> any().
  210. language_tag(Data, Fun) ->
  211. alpha(Data,
  212. fun (_Rest, Tag) when byte_size(Tag) =:= 0; byte_size(Tag) > 8 ->
  213. {error, badarg};
  214. (<< $-, Rest/bits >>, Tag) ->
  215. language_subtag(Rest, Fun, Tag, []);
  216. (Rest, Tag) ->
  217. Fun(Rest, Tag)
  218. end).
  219. -spec language_subtag(binary(), fun(), binary(), [binary()]) -> any().
  220. language_subtag(Data, Fun, Tag, Acc) ->
  221. alpha(Data,
  222. fun (_Rest, SubTag) when byte_size(SubTag) =:= 0;
  223. byte_size(SubTag) > 8 -> {error, badarg};
  224. (<< $-, Rest/bits >>, SubTag) ->
  225. language_subtag(Rest, Fun, Tag, [SubTag|Acc]);
  226. (Rest, SubTag) ->
  227. %% Rebuild the full tag now that we know it's correct
  228. Sub = << << $-, S/binary >> || S <- lists:reverse([SubTag|Acc]) >>,
  229. Fun(Rest, << Tag/binary, Sub/binary >>)
  230. end).
  231. -spec maybe_qparam(binary(), fun()) -> any().
  232. maybe_qparam(Data, Fun) ->
  233. whitespace(Data,
  234. fun (<< $;, Rest/bits >>) ->
  235. whitespace(Rest,
  236. fun (Rest2) ->
  237. qparam(Rest2, Fun)
  238. end);
  239. (Rest) ->
  240. Fun(Rest, 1000)
  241. end).
  242. %% @doc Parse a quality parameter string (for example q=0.500).
  243. -spec qparam(binary(), fun()) -> any().
  244. qparam(<< Q, $=, Data/bits >>, Fun) when Q =:= $q; Q =:= $Q ->
  245. qvalue(Data, Fun).
  246. %% @doc Parse either a list of entity tags or a "*".
  247. -spec entity_tag_match(binary()) -> any().
  248. entity_tag_match(<< $*, Rest/bits >>) ->
  249. whitespace(Rest,
  250. fun (<<>>) -> '*';
  251. (_Any) -> {error, badarg}
  252. end);
  253. entity_tag_match(Data) ->
  254. nonempty_list(Data, fun entity_tag/2).
  255. %% @doc Parse an entity-tag.
  256. -spec entity_tag(binary(), fun()) -> any().
  257. entity_tag(<< "W/", Rest/bits >>, Fun) ->
  258. opaque_tag(Rest, Fun, weak);
  259. entity_tag(Data, Fun) ->
  260. opaque_tag(Data, Fun, strong).
  261. -spec opaque_tag(binary(), fun(), weak | strong) -> any().
  262. opaque_tag(Data, Fun, Strength) ->
  263. quoted_string(Data,
  264. fun (_Rest, <<>>) -> {error, badarg};
  265. (Rest, OpaqueTag) -> Fun(Rest, {Strength, OpaqueTag})
  266. end).
  267. %% @doc Parse an HTTP date (RFC1123, RFC850 or asctime date).
  268. %% @end
  269. %%
  270. %% While this may not be the most efficient date parsing we can do,
  271. %% it should work fine for our purposes because all HTTP dates should
  272. %% be sent as RFC1123 dates in HTTP/1.1.
  273. -spec http_date(binary()) -> any().
  274. http_date(Data) ->
  275. case rfc1123_date(Data) of
  276. {error, badarg} ->
  277. case rfc850_date(Data) of
  278. {error, badarg} ->
  279. case asctime_date(Data) of
  280. {error, badarg} ->
  281. {error, badarg};
  282. HTTPDate ->
  283. HTTPDate
  284. end;
  285. HTTPDate ->
  286. HTTPDate
  287. end;
  288. HTTPDate ->
  289. HTTPDate
  290. end.
  291. %% @doc Parse an RFC1123 date.
  292. -spec rfc1123_date(binary()) -> any().
  293. rfc1123_date(Data) ->
  294. wkday(Data,
  295. fun (<< ", ", Rest/bits >>, _WkDay) ->
  296. date1(Rest,
  297. fun (<< " ", Rest2/bits >>, Date) ->
  298. time(Rest2,
  299. fun (<< " GMT", Rest3/bits >>, Time) ->
  300. http_date_ret(Rest3, {Date, Time});
  301. (_Any, _Time) ->
  302. {error, badarg}
  303. end);
  304. (_Any, _Date) ->
  305. {error, badarg}
  306. end);
  307. (_Any, _WkDay) ->
  308. {error, badarg}
  309. end).
  310. %% @doc Parse an RFC850 date.
  311. -spec rfc850_date(binary()) -> any().
  312. %% From the RFC:
  313. %% HTTP/1.1 clients and caches SHOULD assume that an RFC-850 date
  314. %% which appears to be more than 50 years in the future is in fact
  315. %% in the past (this helps solve the "year 2000" problem).
  316. rfc850_date(Data) ->
  317. weekday(Data,
  318. fun (<< ", ", Rest/bits >>, _WeekDay) ->
  319. date2(Rest,
  320. fun (<< " ", Rest2/bits >>, Date) ->
  321. time(Rest2,
  322. fun (<< " GMT", Rest3/bits >>, Time) ->
  323. http_date_ret(Rest3, {Date, Time});
  324. (_Any, _Time) ->
  325. {error, badarg}
  326. end);
  327. (_Any, _Date) ->
  328. {error, badarg}
  329. end);
  330. (_Any, _WeekDay) ->
  331. {error, badarg}
  332. end).
  333. %% @doc Parse an asctime date.
  334. -spec asctime_date(binary()) -> any().
  335. asctime_date(Data) ->
  336. wkday(Data,
  337. fun (<< " ", Rest/bits >>, _WkDay) ->
  338. date3(Rest,
  339. fun (<< " ", Rest2/bits >>, PartialDate) ->
  340. time(Rest2,
  341. fun (<< " ", Rest3/bits >>, Time) ->
  342. asctime_year(Rest3,
  343. PartialDate, Time);
  344. (_Any, _Time) ->
  345. {error, badarg}
  346. end);
  347. (_Any, _PartialDate) ->
  348. {error, badarg}
  349. end);
  350. (_Any, _WkDay) ->
  351. {error, badarg1}
  352. end).
  353. -spec asctime_year(binary(), tuple(), tuple()) -> any().
  354. asctime_year(<< Y1, Y2, Y3, Y4, Rest/bits >>, {Month, Day}, Time)
  355. when Y1 >= $0, Y1 =< $9, Y2 >= $0, Y2 =< $9,
  356. Y3 >= $0, Y3 =< $9, Y4 >= $0, Y4 =< $9 ->
  357. Year = (Y1 - $0) * 1000 + (Y2 - $0) * 100 + (Y3 - $0) * 10 + (Y4 - $0),
  358. http_date_ret(Rest, {{Year, Month, Day}, Time}).
  359. -spec http_date_ret(binary(), tuple()) -> any().
  360. http_date_ret(Data, DateTime = {Date, _Time}) ->
  361. whitespace(Data,
  362. fun (<<>>) ->
  363. case calendar:valid_date(Date) of
  364. true -> DateTime;
  365. false -> {error, badarg}
  366. end;
  367. (_Any) ->
  368. {error, badarg}
  369. end).
  370. %% We never use it, pretty much just checks the wkday is right.
  371. -spec wkday(binary(), fun()) -> any().
  372. wkday(<< WkDay:3/binary, Rest/bits >>, Fun)
  373. when WkDay =:= <<"Mon">>; WkDay =:= <<"Tue">>; WkDay =:= <<"Wed">>;
  374. WkDay =:= <<"Thu">>; WkDay =:= <<"Fri">>; WkDay =:= <<"Sat">>;
  375. WkDay =:= <<"Sun">> ->
  376. Fun(Rest, WkDay);
  377. wkday(_Any, _Fun) ->
  378. {error, badarg}.
  379. %% We never use it, pretty much just checks the weekday is right.
  380. -spec weekday(binary(), fun()) -> any().
  381. weekday(<< "Monday", Rest/binary >>, Fun) ->
  382. Fun(Rest, <<"Monday">>);
  383. weekday(<< "Tuesday", Rest/binary >>, Fun) ->
  384. Fun(Rest, <<"Tuesday">>);
  385. weekday(<< "Wednesday", Rest/binary >>, Fun) ->
  386. Fun(Rest, <<"Wednesday">>);
  387. weekday(<< "Thursday", Rest/binary >>, Fun) ->
  388. Fun(Rest, <<"Thursday">>);
  389. weekday(<< "Friday", Rest/binary >>, Fun) ->
  390. Fun(Rest, <<"Friday">>);
  391. weekday(<< "Saturday", Rest/binary >>, Fun) ->
  392. Fun(Rest, <<"Saturday">>);
  393. weekday(<< "Sunday", Rest/binary >>, Fun) ->
  394. Fun(Rest, <<"Sunday">>);
  395. weekday(_Any, _Fun) ->
  396. {error, badarg}.
  397. -spec date1(binary(), fun()) -> any().
  398. date1(<< D1, D2, " ", M:3/binary, " ", Y1, Y2, Y3, Y4, Rest/bits >>, Fun)
  399. when D1 >= $0, D1 =< $9, D2 >= $0, D2 =< $9,
  400. Y1 >= $0, Y1 =< $9, Y2 >= $0, Y2 =< $9,
  401. Y3 >= $0, Y3 =< $9, Y4 >= $0, Y4 =< $9 ->
  402. case month(M) of
  403. {error, badarg} ->
  404. {error, badarg};
  405. Month ->
  406. Fun(Rest, {
  407. (Y1 - $0) * 1000 + (Y2 - $0) * 100 + (Y3 - $0) * 10 + (Y4 - $0),
  408. Month,
  409. (D1 - $0) * 10 + (D2 - $0)
  410. })
  411. end;
  412. date1(_Data, _Fun) ->
  413. {error, badarg}.
  414. -spec date2(binary(), fun()) -> any().
  415. date2(<< D1, D2, "-", M:3/binary, "-", Y1, Y2, Rest/bits >>, Fun)
  416. when D1 >= $0, D1 =< $9, D2 >= $0, D2 =< $9,
  417. Y1 >= $0, Y1 =< $9, Y2 >= $0, Y2 =< $9 ->
  418. case month(M) of
  419. {error, badarg} ->
  420. {error, badarg};
  421. Month ->
  422. Year = (Y1 - $0) * 10 + (Y2 - $0),
  423. Year2 = case Year > 50 of
  424. true -> Year + 1900;
  425. false -> Year + 2000
  426. end,
  427. Fun(Rest, {
  428. Year2,
  429. Month,
  430. (D1 - $0) * 10 + (D2 - $0)
  431. })
  432. end;
  433. date2(_Data, _Fun) ->
  434. {error, badarg}.
  435. -spec date3(binary(), fun()) -> any().
  436. date3(<< M:3/binary, " ", D1, D2, Rest/bits >>, Fun)
  437. when (D1 >= $0 andalso D1 =< $3) orelse D1 =:= $\s,
  438. D2 >= $0, D2 =< $9 ->
  439. case month(M) of
  440. {error, badarg} ->
  441. {error, badarg};
  442. Month ->
  443. Day = case D1 of
  444. $\s -> D2 - $0;
  445. D1 -> (D1 - $0) * 10 + (D2 - $0)
  446. end,
  447. Fun(Rest, {Month, Day})
  448. end;
  449. date3(_Data, _Fun) ->
  450. {error, badarg}.
  451. -spec month(<< _:24 >>) -> 1..12 | {error, badarg}.
  452. month(<<"Jan">>) -> 1;
  453. month(<<"Feb">>) -> 2;
  454. month(<<"Mar">>) -> 3;
  455. month(<<"Apr">>) -> 4;
  456. month(<<"May">>) -> 5;
  457. month(<<"Jun">>) -> 6;
  458. month(<<"Jul">>) -> 7;
  459. month(<<"Aug">>) -> 8;
  460. month(<<"Sep">>) -> 9;
  461. month(<<"Oct">>) -> 10;
  462. month(<<"Nov">>) -> 11;
  463. month(<<"Dec">>) -> 12;
  464. month(_Any) -> {error, badarg}.
  465. -spec time(binary(), fun()) -> any().
  466. time(<< H1, H2, ":", M1, M2, ":", S1, S2, Rest/bits >>, Fun)
  467. when H1 >= $0, H1 =< $2, H2 >= $0, H2 =< $9,
  468. M1 >= $0, M1 =< $5, M2 >= $0, M2 =< $9,
  469. S1 >= $0, S1 =< $5, S2 >= $0, S2 =< $9 ->
  470. Hour = (H1 - $0) * 10 + (H2 - $0),
  471. case Hour < 24 of
  472. true ->
  473. Time = {
  474. Hour,
  475. (M1 - $0) * 10 + (M2 - $0),
  476. (S1 - $0) * 10 + (S2 - $0)
  477. },
  478. Fun(Rest, Time);
  479. false ->
  480. {error, badarg}
  481. end.
  482. %% @doc Skip whitespace.
  483. -spec whitespace(binary(), fun()) -> any().
  484. whitespace(<< C, Rest/bits >>, Fun)
  485. when C =:= $\s; C =:= $\t ->
  486. whitespace(Rest, Fun);
  487. whitespace(Data, Fun) ->
  488. Fun(Data).
  489. %% @doc Parse a list of digits as a non negative integer.
  490. -spec digits(binary()) -> non_neg_integer() | {error, badarg}.
  491. digits(Data) ->
  492. digits(Data,
  493. fun (Rest, I) ->
  494. whitespace(Rest,
  495. fun (<<>>) ->
  496. I;
  497. (_Rest2) ->
  498. {error, badarg}
  499. end)
  500. end).
  501. -spec digits(binary(), fun()) -> any().
  502. digits(<< C, Rest/bits >>, Fun)
  503. when C >= $0, C =< $9 ->
  504. digits(Rest, Fun, C - $0);
  505. digits(_Data, _Fun) ->
  506. {error, badarg}.
  507. -spec digits(binary(), fun(), non_neg_integer()) -> any().
  508. digits(<< C, Rest/bits >>, Fun, Acc)
  509. when C >= $0, C =< $9 ->
  510. digits(Rest, Fun, Acc * 10 + (C - $0));
  511. digits(Data, Fun, Acc) ->
  512. Fun(Data, Acc).
  513. %% @doc Parse a list of case-insensitive alpha characters.
  514. %%
  515. %% Changes all characters to lowercase.
  516. -spec alpha(binary(), fun()) -> any().
  517. alpha(Data, Fun) ->
  518. alpha(Data, Fun, <<>>).
  519. -spec alpha(binary(), fun(), binary()) -> any().
  520. alpha(<<>>, Fun, Acc) ->
  521. Fun(<<>>, Acc);
  522. alpha(<< C, Rest/bits >>, Fun, Acc)
  523. when C >= $a andalso C =< $z;
  524. C >= $A andalso C =< $Z ->
  525. C2 = cowboy_bstr:char_to_lower(C),
  526. alpha(Rest, Fun, << Acc/binary, C2 >>);
  527. alpha(Data, Fun, Acc) ->
  528. Fun(Data, Acc).
  529. %% @doc Parse either a token or a quoted string.
  530. -spec word(binary(), fun()) -> any().
  531. word(Data = << $", _/bits >>, Fun) ->
  532. quoted_string(Data, Fun);
  533. word(Data, Fun) ->
  534. token(Data,
  535. fun (_Rest, <<>>) -> {error, badarg};
  536. (Rest, Token) -> Fun(Rest, Token)
  537. end).
  538. %% @doc Parse a case-insensitive token.
  539. %%
  540. %% Changes all characters to lowercase.
  541. -spec token_ci(binary(), fun()) -> any().
  542. token_ci(Data, Fun) ->
  543. token(Data, Fun, ci, <<>>).
  544. %% @doc Parse a token.
  545. -spec token(binary(), fun()) -> any().
  546. token(Data, Fun) ->
  547. token(Data, Fun, cs, <<>>).
  548. -spec token(binary(), fun(), ci | cs, binary()) -> any().
  549. token(<<>>, Fun, _Case, Acc) ->
  550. Fun(<<>>, Acc);
  551. token(Data = << C, _Rest/bits >>, Fun, _Case, Acc)
  552. when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
  553. C =:= $,; C =:= $;; C =:= $:; C =:= $\\; C =:= $";
  554. C =:= $/; C =:= $[; C =:= $]; C =:= $?; C =:= $=;
  555. C =:= ${; C =:= $}; C =:= $\s; C =:= $\t;
  556. C < 32; C =:= 127 ->
  557. Fun(Data, Acc);
  558. token(<< C, Rest/bits >>, Fun, Case = ci, Acc) ->
  559. C2 = cowboy_bstr:char_to_lower(C),
  560. token(Rest, Fun, Case, << Acc/binary, C2 >>);
  561. token(<< C, Rest/bits >>, Fun, Case, Acc) ->
  562. token(Rest, Fun, Case, << Acc/binary, C >>).
  563. %% @doc Parse a quoted string.
  564. -spec quoted_string(binary(), fun()) -> any().
  565. quoted_string(<< $", Rest/bits >>, Fun) ->
  566. quoted_string(Rest, Fun, <<>>).
  567. -spec quoted_string(binary(), fun(), binary()) -> any().
  568. quoted_string(<<>>, _Fun, _Acc) ->
  569. {error, badarg};
  570. quoted_string(<< $", Rest/bits >>, Fun, Acc) ->
  571. Fun(Rest, Acc);
  572. quoted_string(<< $\\, C, Rest/bits >>, Fun, Acc) ->
  573. quoted_string(Rest, Fun, << Acc/binary, C >>);
  574. quoted_string(<< C, Rest/bits >>, Fun, Acc) ->
  575. quoted_string(Rest, Fun, << Acc/binary, C >>).
  576. %% @doc Parse a quality value.
  577. -spec qvalue(binary(), fun()) -> any().
  578. qvalue(<< $0, $., Rest/bits >>, Fun) ->
  579. qvalue(Rest, Fun, 0, 100);
  580. qvalue(<< $0, Rest/bits >>, Fun) ->
  581. Fun(Rest, 0);
  582. qvalue(<< $1, $., $0, $0, $0, Rest/bits >>, Fun) ->
  583. Fun(Rest, 1000);
  584. qvalue(<< $1, $., $0, $0, Rest/bits >>, Fun) ->
  585. Fun(Rest, 1000);
  586. qvalue(<< $1, $., $0, Rest/bits >>, Fun) ->
  587. Fun(Rest, 1000);
  588. qvalue(<< $1, Rest/bits >>, Fun) ->
  589. Fun(Rest, 1000);
  590. qvalue(_Data, _Fun) ->
  591. {error, badarg}.
  592. -spec qvalue(binary(), fun(), integer(), 1 | 10 | 100) -> any().
  593. qvalue(Data, Fun, Q, 0) ->
  594. Fun(Data, Q);
  595. qvalue(<< C, Rest/bits >>, Fun, Q, M)
  596. when C >= $0, C =< $9 ->
  597. qvalue(Rest, Fun, Q + (C - $0) * M, M div 10);
  598. qvalue(Data, Fun, Q, _M) ->
  599. Fun(Data, Q).
  600. %% Interpretation.
  601. %% @doc Walk through a tokens list and return whether
  602. %% the connection is keepalive or closed.
  603. %%
  604. %% The connection token is expected to be lower-case.
  605. -spec connection_to_atom([binary()]) -> keepalive | close.
  606. connection_to_atom([]) ->
  607. keepalive;
  608. connection_to_atom([<<"keep-alive">>|_Tail]) ->
  609. keepalive;
  610. connection_to_atom([<<"close">>|_Tail]) ->
  611. close;
  612. connection_to_atom([_Any|Tail]) ->
  613. connection_to_atom(Tail).
  614. %% @doc Decode a URL encoded binary.
  615. %% @equiv urldecode(Bin, crash)
  616. -spec urldecode(binary()) -> binary().
  617. urldecode(Bin) when is_binary(Bin) ->
  618. urldecode(Bin, <<>>, crash).
  619. %% @doc Decode a URL encoded binary.
  620. %% The second argument specifies how to handle percent characters that are not
  621. %% followed by two valid hex characters. Use `skip' to ignore such errors,
  622. %% if `crash' is used the function will fail with the reason `badarg'.
  623. -spec urldecode(binary(), crash | skip) -> binary().
  624. urldecode(Bin, OnError) when is_binary(Bin) ->
  625. urldecode(Bin, <<>>, OnError).
  626. -spec urldecode(binary(), binary(), crash | skip) -> binary().
  627. urldecode(<<$%, H, L, Rest/binary>>, Acc, OnError) ->
  628. G = unhex(H),
  629. M = unhex(L),
  630. if G =:= error; M =:= error ->
  631. case OnError of skip -> ok; crash -> erlang:error(badarg) end,
  632. urldecode(<<H, L, Rest/binary>>, <<Acc/binary, $%>>, OnError);
  633. true ->
  634. urldecode(Rest, <<Acc/binary, (G bsl 4 bor M)>>, OnError)
  635. end;
  636. urldecode(<<$%, Rest/binary>>, Acc, OnError) ->
  637. case OnError of skip -> ok; crash -> erlang:error(badarg) end,
  638. urldecode(Rest, <<Acc/binary, $%>>, OnError);
  639. urldecode(<<$+, Rest/binary>>, Acc, OnError) ->
  640. urldecode(Rest, <<Acc/binary, $ >>, OnError);
  641. urldecode(<<C, Rest/binary>>, Acc, OnError) ->
  642. urldecode(Rest, <<Acc/binary, C>>, OnError);
  643. urldecode(<<>>, Acc, _OnError) ->
  644. Acc.
  645. -spec unhex(byte()) -> byte() | error.
  646. unhex(C) when C >= $0, C =< $9 -> C - $0;
  647. unhex(C) when C >= $A, C =< $F -> C - $A + 10;
  648. unhex(C) when C >= $a, C =< $f -> C - $a + 10;
  649. unhex(_) -> error.
  650. %% @doc URL encode a string binary.
  651. %% @equiv urlencode(Bin, [])
  652. -spec urlencode(binary()) -> binary().
  653. urlencode(Bin) ->
  654. urlencode(Bin, []).
  655. %% @doc URL encode a string binary.
  656. %% The `noplus' option disables the default behaviour of quoting space
  657. %% characters, `\s', as `+'. The `upper' option overrides the default behaviour
  658. %% of writing hex numbers using lowecase letters to using uppercase letters
  659. %% instead.
  660. -spec urlencode(binary(), [noplus|upper]) -> binary().
  661. urlencode(Bin, Opts) ->
  662. Plus = not proplists:get_value(noplus, Opts, false),
  663. Upper = proplists:get_value(upper, Opts, false),
  664. urlencode(Bin, <<>>, Plus, Upper).
  665. -spec urlencode(binary(), binary(), boolean(), boolean()) -> binary().
  666. urlencode(<<C, Rest/binary>>, Acc, P=Plus, U=Upper) ->
  667. if C >= $0, C =< $9 -> urlencode(Rest, <<Acc/binary, C>>, P, U);
  668. C >= $A, C =< $Z -> urlencode(Rest, <<Acc/binary, C>>, P, U);
  669. C >= $a, C =< $z -> urlencode(Rest, <<Acc/binary, C>>, P, U);
  670. C =:= $.; C =:= $-; C =:= $~; C =:= $_ ->
  671. urlencode(Rest, <<Acc/binary, C>>, P, U);
  672. C =:= $ , Plus ->
  673. urlencode(Rest, <<Acc/binary, $+>>, P, U);
  674. true ->
  675. H = C band 16#F0 bsr 4, L = C band 16#0F,
  676. H1 = if Upper -> tohexu(H); true -> tohexl(H) end,
  677. L1 = if Upper -> tohexu(L); true -> tohexl(L) end,
  678. urlencode(Rest, <<Acc/binary, $%, H1, L1>>, P, U)
  679. end;
  680. urlencode(<<>>, Acc, _Plus, _Upper) ->
  681. Acc.
  682. -spec tohexu(byte()) -> byte().
  683. tohexu(C) when C < 10 -> $0 + C;
  684. tohexu(C) when C < 17 -> $A + C - 10.
  685. -spec tohexl(byte()) -> byte().
  686. tohexl(C) when C < 10 -> $0 + C;
  687. tohexl(C) when C < 17 -> $a + C - 10.
  688. %% Tests.
  689. -ifdef(TEST).
  690. nonempty_charset_list_test_() ->
  691. %% {Value, Result}
  692. Tests = [
  693. {<<>>, {error, badarg}},
  694. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  695. {<<"iso-8859-5">>, 1000},
  696. {<<"unicode-1-1">>, 800}
  697. ]}
  698. ],
  699. [{V, fun() -> R = nonempty_list(V, fun conneg/2) end} || {V, R} <- Tests].
  700. nonempty_language_range_list_test_() ->
  701. %% {Value, Result}
  702. Tests = [
  703. {<<"da, en-gb;q=0.8, en;q=0.7">>, [
  704. {<<"da">>, 1000},
  705. {<<"en-gb">>, 800},
  706. {<<"en">>, 700}
  707. ]},
  708. {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin">>, [
  709. {<<"en">>, 1000},
  710. {<<"en-us">>, 1000},
  711. {<<"en-cockney">>, 1000},
  712. {<<"i-cherokee">>, 1000},
  713. {<<"x-pig-latin">>, 1000}
  714. ]}
  715. ],
  716. [{V, fun() -> R = nonempty_list(V, fun language_range/2) end}
  717. || {V, R} <- Tests].
  718. nonempty_token_list_test_() ->
  719. %% {Value, Result}
  720. Tests = [
  721. {<<>>, {error, badarg}},
  722. {<<" ">>, {error, badarg}},
  723. {<<" , ">>, {error, badarg}},
  724. {<<",,,">>, {error, badarg}},
  725. {<<"a b">>, {error, badarg}},
  726. {<<"a , , , ">>, [<<"a">>]},
  727. {<<" , , , a">>, [<<"a">>]},
  728. {<<"a, , b">>, [<<"a">>, <<"b">>]},
  729. {<<"close">>, [<<"close">>]},
  730. {<<"keep-alive, upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  731. ],
  732. [{V, fun() -> R = nonempty_list(V, fun token/2) end} || {V, R} <- Tests].
  733. media_range_list_test_() ->
  734. %% {Tokens, Result}
  735. Tests = [
  736. {<<"audio/*; q=0.2, audio/basic">>, [
  737. {{<<"audio">>, <<"*">>, []}, 200, []},
  738. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  739. ]},
  740. {<<"text/plain; q=0.5, text/html, "
  741. "text/x-dvi; q=0.8, text/x-c">>, [
  742. {{<<"text">>, <<"plain">>, []}, 500, []},
  743. {{<<"text">>, <<"html">>, []}, 1000, []},
  744. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  745. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  746. ]},
  747. {<<"text/*, text/html, text/html;level=1, */*">>, [
  748. {{<<"text">>, <<"*">>, []}, 1000, []},
  749. {{<<"text">>, <<"html">>, []}, 1000, []},
  750. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  751. {{<<"*">>, <<"*">>, []}, 1000, []}
  752. ]},
  753. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  754. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  755. {{<<"text">>, <<"*">>, []}, 300, []},
  756. {{<<"text">>, <<"html">>, []}, 700, []},
  757. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  758. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  759. {{<<"*">>, <<"*">>, []}, 500, []}
  760. ]},
  761. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  762. "q=0.123;standalone;complex=gits, text/plain">>, [
  763. {{<<"text">>, <<"html">>,
  764. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  765. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  766. {{<<"text">>, <<"plain">>, []}, 1000, []}
  767. ]}
  768. ],
  769. [{V, fun() -> R = list(V, fun media_range/2) end} || {V, R} <- Tests].
  770. entity_tag_match_test_() ->
  771. %% {Tokens, Result}
  772. Tests = [
  773. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  774. {<<"\"xyzzy\", W/\"r2d2xxxx\", \"c3piozzzz\"">>,
  775. [{strong, <<"xyzzy">>},
  776. {weak, <<"r2d2xxxx">>},
  777. {strong, <<"c3piozzzz">>}]},
  778. {<<"*">>, '*'}
  779. ],
  780. [{V, fun() -> R = entity_tag_match(V) end} || {V, R} <- Tests].
  781. http_date_test_() ->
  782. %% {Tokens, Result}
  783. Tests = [
  784. {<<"Sun, 06 Nov 1994 08:49:37 GMT">>, {{1994, 11, 6}, {8, 49, 37}}},
  785. {<<"Sunday, 06-Nov-94 08:49:37 GMT">>, {{1994, 11, 6}, {8, 49, 37}}},
  786. {<<"Sun Nov 6 08:49:37 1994">>, {{1994, 11, 6}, {8, 49, 37}}}
  787. ],
  788. [{V, fun() -> R = http_date(V) end} || {V, R} <- Tests].
  789. rfc1123_date_test_() ->
  790. %% {Tokens, Result}
  791. Tests = [
  792. {<<"Sun, 06 Nov 1994 08:49:37 GMT">>, {{1994, 11, 6}, {8, 49, 37}}}
  793. ],
  794. [{V, fun() -> R = rfc1123_date(V) end} || {V, R} <- Tests].
  795. rfc850_date_test_() ->
  796. %% {Tokens, Result}
  797. Tests = [
  798. {<<"Sunday, 06-Nov-94 08:49:37 GMT">>, {{1994, 11, 6}, {8, 49, 37}}}
  799. ],
  800. [{V, fun() -> R = rfc850_date(V) end} || {V, R} <- Tests].
  801. asctime_date_test_() ->
  802. %% {Tokens, Result}
  803. Tests = [
  804. {<<"Sun Nov 6 08:49:37 1994">>, {{1994, 11, 6}, {8, 49, 37}}}
  805. ],
  806. [{V, fun() -> R = asctime_date(V) end} || {V, R} <- Tests].
  807. connection_to_atom_test_() ->
  808. %% {Tokens, Result}
  809. Tests = [
  810. {[<<"close">>], close},
  811. {[<<"keep-alive">>], keepalive},
  812. {[<<"keep-alive">>, <<"upgrade">>], keepalive}
  813. ],
  814. [{lists:flatten(io_lib:format("~p", [T])),
  815. fun() -> R = connection_to_atom(T) end} || {T, R} <- Tests].
  816. content_type_test_() ->
  817. %% {ContentType, Result}
  818. Tests = [
  819. {<<"text/plain; charset=iso-8859-4">>,
  820. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  821. {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
  822. {<<"multipart">>, <<"form-data">>, [
  823. {<<"boundary">>, <<"MultipartIsUgly">>}
  824. ]}},
  825. {<<"foo/bar; one=FirstParam; two=SecondParam">>,
  826. {<<"foo">>, <<"bar">>, [
  827. {<<"one">>, <<"FirstParam">>},
  828. {<<"two">>, <<"SecondParam">>}
  829. ]}}
  830. ],
  831. [{V, fun () -> R = content_type(V) end} || {V, R} <- Tests].
  832. digits_test_() ->
  833. %% {Digits, Result}
  834. Tests = [
  835. {<<"42 ">>, 42},
  836. {<<"69\t">>, 69},
  837. {<<"1337">>, 1337}
  838. ],
  839. [{V, fun() -> R = digits(V) end} || {V, R} <- Tests].
  840. urldecode_test_() ->
  841. U = fun urldecode/2,
  842. [?_assertEqual(<<" ">>, U(<<"%20">>, crash)),
  843. ?_assertEqual(<<" ">>, U(<<"+">>, crash)),
  844. ?_assertEqual(<<0>>, U(<<"%00">>, crash)),
  845. ?_assertEqual(<<255>>, U(<<"%fF">>, crash)),
  846. ?_assertEqual(<<"123">>, U(<<"123">>, crash)),
  847. ?_assertEqual(<<"%i5">>, U(<<"%i5">>, skip)),
  848. ?_assertEqual(<<"%5">>, U(<<"%5">>, skip)),
  849. ?_assertError(badarg, U(<<"%i5">>, crash)),
  850. ?_assertError(badarg, U(<<"%5">>, crash))
  851. ].
  852. urlencode_test_() ->
  853. U = fun urlencode/2,
  854. [?_assertEqual(<<"%ff%00">>, U(<<255,0>>, [])),
  855. ?_assertEqual(<<"%FF%00">>, U(<<255,0>>, [upper])),
  856. ?_assertEqual(<<"+">>, U(<<" ">>, [])),
  857. ?_assertEqual(<<"%20">>, U(<<" ">>, [noplus])),
  858. ?_assertEqual(<<"aBc">>, U(<<"aBc">>, [])),
  859. ?_assertEqual(<<".-~_">>, U(<<".-~_">>, [])),
  860. ?_assertEqual(<<"%ff+">>, urlencode(<<255, " ">>))
  861. ].
  862. -endif.