cow_link.erl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. %% Copyright (c) 2019-2023, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_link).
  15. -compile({no_auto_import, [link/1]}).
  16. -export([parse_link/1]).
  17. -export([resolve_link/2]).
  18. -export([resolve_link/3]).
  19. -export([link/1]).
  20. -include("cow_inline.hrl").
  21. -include("cow_parse.hrl").
  22. -type link() :: #{
  23. target := binary(),
  24. rel := binary(),
  25. attributes := [{binary(), binary()}]
  26. }.
  27. -export_type([link/0]).
  28. -type resolve_opts() :: #{
  29. allow_anchor => boolean()
  30. }.
  31. -type uri() :: uri_string:uri_map() | uri_string:uri_string() | undefined.
  32. %% Parse a link header.
  33. %% This function returns the URI target from the header directly.
  34. %% Relative URIs must then be resolved as per RFC3986 5. In some
  35. %% cases it might not be possible to resolve URIs, for example when
  36. %% the link header is returned with a 404 status code.
  37. -spec parse_link(binary()) -> [link()].
  38. parse_link(Link) ->
  39. before_target(Link, []).
  40. before_target(<<>>, Acc) -> lists:reverse(Acc);
  41. before_target(<<$<,R/bits>>, Acc) -> target(R, Acc, <<>>);
  42. before_target(<<C,R/bits>>, Acc) when ?IS_WS(C) -> before_target(R, Acc).
  43. target(<<$>,R/bits>>, Acc, T) -> param_sep(R, Acc, T, []);
  44. target(<<C,R/bits>>, Acc, T) -> target(R, Acc, <<T/binary, C>>).
  45. param_sep(<<>>, Acc, T, P) -> lists:reverse(acc_link(Acc, T, P));
  46. param_sep(<<$,,R/bits>>, Acc, T, P) -> before_target(R, acc_link(Acc, T, P));
  47. param_sep(<<$;,R/bits>>, Acc, T, P) -> before_param(R, Acc, T, P);
  48. param_sep(<<C,R/bits>>, Acc, T, P) when ?IS_WS(C) -> param_sep(R, Acc, T, P).
  49. before_param(<<C,R/bits>>, Acc, T, P) when ?IS_WS(C) -> before_param(R, Acc, T, P);
  50. before_param(<<C,R/bits>>, Acc, T, P) when ?IS_TOKEN(C) -> ?LOWER(param, R, Acc, T, P, <<>>).
  51. param(<<$=,$",R/bits>>, Acc, T, P, K) -> quoted(R, Acc, T, P, K, <<>>);
  52. param(<<$=,C,R/bits>>, Acc, T, P, K) when ?IS_TOKEN(C) -> value(R, Acc, T, P, K, <<C>>);
  53. param(<<C,R/bits>>, Acc, T, P, K) when ?IS_TOKEN(C) -> ?LOWER(param, R, Acc, T, P, K).
  54. quoted(<<$",R/bits>>, Acc, T, P, K, V) -> param_sep(R, Acc, T, [{K, V}|P]);
  55. quoted(<<$\\,C,R/bits>>, Acc, T, P, K, V) when ?IS_VCHAR_OBS(C) -> quoted(R, Acc, T, P, K, <<V/binary,C>>);
  56. quoted(<<C,R/bits>>, Acc, T, P, K, V) when ?IS_VCHAR_OBS(C) -> quoted(R, Acc, T, P, K, <<V/binary,C>>).
  57. value(<<C,R/bits>>, Acc, T, P, K, V) when ?IS_TOKEN(C) -> value(R, Acc, T, P, K, <<V/binary,C>>);
  58. value(R, Acc, T, P, K, V) -> param_sep(R, Acc, T, [{K, V}|P]).
  59. acc_link(Acc, Target, Params0) ->
  60. Params1 = lists:reverse(Params0),
  61. %% The rel parameter MUST be present. (RFC8288 3.3)
  62. {value, {_, Rel}, Params2} = lists:keytake(<<"rel">>, 1, Params1),
  63. %% Occurrences after the first MUST be ignored by parsers.
  64. Params = filter_out_duplicates(Params2, #{}),
  65. [#{
  66. target => Target,
  67. rel => ?LOWER(Rel),
  68. attributes => Params
  69. }|Acc].
  70. %% This function removes duplicates for attributes that don't allow them.
  71. filter_out_duplicates([], _) ->
  72. [];
  73. %% The "rel" is mandatory and was already removed from params.
  74. filter_out_duplicates([{<<"rel">>, _}|Tail], State) ->
  75. filter_out_duplicates(Tail, State);
  76. filter_out_duplicates([{<<"anchor">>, _}|Tail], State=#{anchor := true}) ->
  77. filter_out_duplicates(Tail, State);
  78. filter_out_duplicates([{<<"media">>, _}|Tail], State=#{media := true}) ->
  79. filter_out_duplicates(Tail, State);
  80. filter_out_duplicates([{<<"title">>, _}|Tail], State=#{title := true}) ->
  81. filter_out_duplicates(Tail, State);
  82. filter_out_duplicates([{<<"title*">>, _}|Tail], State=#{title_star := true}) ->
  83. filter_out_duplicates(Tail, State);
  84. filter_out_duplicates([{<<"type">>, _}|Tail], State=#{type := true}) ->
  85. filter_out_duplicates(Tail, State);
  86. filter_out_duplicates([Tuple={<<"anchor">>, _}|Tail], State) ->
  87. [Tuple|filter_out_duplicates(Tail, State#{anchor => true})];
  88. filter_out_duplicates([Tuple={<<"media">>, _}|Tail], State) ->
  89. [Tuple|filter_out_duplicates(Tail, State#{media => true})];
  90. filter_out_duplicates([Tuple={<<"title">>, _}|Tail], State) ->
  91. [Tuple|filter_out_duplicates(Tail, State#{title => true})];
  92. filter_out_duplicates([Tuple={<<"title*">>, _}|Tail], State) ->
  93. [Tuple|filter_out_duplicates(Tail, State#{title_star => true})];
  94. filter_out_duplicates([Tuple={<<"type">>, _}|Tail], State) ->
  95. [Tuple|filter_out_duplicates(Tail, State#{type => true})];
  96. filter_out_duplicates([Tuple|Tail], State) ->
  97. [Tuple|filter_out_duplicates(Tail, State)].
  98. -ifdef(TEST).
  99. parse_link_test_() ->
  100. Tests = [
  101. {<<>>, []},
  102. {<<" ">>, []},
  103. %% Examples from the RFC.
  104. {<<"<http://example.com/TheBook/chapter2>; rel=\"previous\"; title=\"previous chapter\"">>, [
  105. #{
  106. target => <<"http://example.com/TheBook/chapter2">>,
  107. rel => <<"previous">>,
  108. attributes => [
  109. {<<"title">>, <<"previous chapter">>}
  110. ]
  111. }
  112. ]},
  113. {<<"</>; rel=\"http://example.net/foo\"">>, [
  114. #{
  115. target => <<"/">>,
  116. rel => <<"http://example.net/foo">>,
  117. attributes => []
  118. }
  119. ]},
  120. {<<"</terms>; rel=\"copyright\"; anchor=\"#foo\"">>, [
  121. #{
  122. target => <<"/terms">>,
  123. rel => <<"copyright">>,
  124. attributes => [
  125. {<<"anchor">>, <<"#foo">>}
  126. ]
  127. }
  128. ]},
  129. % {<<"</TheBook/chapter2>; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, "
  130. % "</TheBook/chapter4>; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel">>, [
  131. % %% @todo
  132. % ]}
  133. {<<"<http://example.org/>; rel=\"start http://example.net/relation/other\"">>, [
  134. #{
  135. target => <<"http://example.org/">>,
  136. rel => <<"start http://example.net/relation/other">>,
  137. attributes => []
  138. }
  139. ]},
  140. {<<"<https://example.org/>; rel=\"start\", "
  141. "<https://example.org/index>; rel=\"index\"">>, [
  142. #{
  143. target => <<"https://example.org/">>,
  144. rel => <<"start">>,
  145. attributes => []
  146. },
  147. #{
  148. target => <<"https://example.org/index">>,
  149. rel => <<"index">>,
  150. attributes => []
  151. }
  152. ]},
  153. %% Relation types are case insensitive.
  154. {<<"</>; rel=\"SELF\"">>, [
  155. #{
  156. target => <<"/">>,
  157. rel => <<"self">>,
  158. attributes => []
  159. }
  160. ]},
  161. {<<"</>; rel=\"HTTP://EXAMPLE.NET/FOO\"">>, [
  162. #{
  163. target => <<"/">>,
  164. rel => <<"http://example.net/foo">>,
  165. attributes => []
  166. }
  167. ]},
  168. %% Attribute names are case insensitive.
  169. {<<"</terms>; REL=\"copyright\"; ANCHOR=\"#foo\"">>, [
  170. #{
  171. target => <<"/terms">>,
  172. rel => <<"copyright">>,
  173. attributes => [
  174. {<<"anchor">>, <<"#foo">>}
  175. ]
  176. }
  177. ]}
  178. ],
  179. [{V, fun() -> R = parse_link(V) end} || {V, R} <- Tests].
  180. -endif.
  181. %% Resolve a link based on the context URI and options.
  182. -spec resolve_link(Link, uri()) -> Link | false when Link::link().
  183. resolve_link(Link, ContextURI) ->
  184. resolve_link(Link, ContextURI, #{}).
  185. -spec resolve_link(Link, uri(), resolve_opts()) -> Link | false when Link::link().
  186. %% When we do not have a context URI we only succeed when the target URI is absolute.
  187. %% The target URI will only be normalized in that case.
  188. resolve_link(Link=#{target := TargetURI}, undefined, _) ->
  189. case uri_string:parse(TargetURI) of
  190. URIMap = #{scheme := _} ->
  191. Link#{target => uri_string:normalize(URIMap)};
  192. _ ->
  193. false
  194. end;
  195. resolve_link(Link=#{attributes := Params}, ContextURI, Opts) ->
  196. AllowAnchor = maps:get(allow_anchor, Opts, true),
  197. case lists:keyfind(<<"anchor">>, 1, Params) of
  198. false ->
  199. do_resolve_link(Link, ContextURI);
  200. {_, Anchor} when AllowAnchor ->
  201. do_resolve_link(Link, resolve(Anchor, ContextURI));
  202. _ ->
  203. false
  204. end.
  205. do_resolve_link(Link=#{target := TargetURI}, ContextURI) ->
  206. Link#{target => uri_string:recompose(resolve(TargetURI, ContextURI))}.
  207. -ifdef(TEST).
  208. resolve_link_test_() ->
  209. Tests = [
  210. %% No context URI available.
  211. {#{target => <<"http://a/b/./c">>}, undefined, #{},
  212. #{target => <<"http://a/b/c">>}},
  213. {#{target => <<"a/b/./c">>}, undefined, #{},
  214. false},
  215. %% Context URI available, allow_anchor => true.
  216. {#{target => <<"http://a/b">>, attributes => []}, <<"http://a/c">>, #{},
  217. #{target => <<"http://a/b">>, attributes => []}},
  218. {#{target => <<"b">>, attributes => []}, <<"http://a/c">>, #{},
  219. #{target => <<"http://a/b">>, attributes => []}},
  220. {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}, <<"http://a/c">>, #{},
  221. #{target => <<"http://a/b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}},
  222. {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}, <<"http://a/c">>, #{},
  223. #{target => <<"http://a/d/b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}},
  224. %% Context URI available, allow_anchor => false.
  225. {#{target => <<"http://a/b">>, attributes => []}, <<"http://a/c">>, #{allow_anchor => false},
  226. #{target => <<"http://a/b">>, attributes => []}},
  227. {#{target => <<"b">>, attributes => []}, <<"http://a/c">>, #{allow_anchor => false},
  228. #{target => <<"http://a/b">>, attributes => []}},
  229. {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"#frag">>}]},
  230. <<"http://a/c">>, #{allow_anchor => false}, false},
  231. {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"d/e">>}]},
  232. <<"http://a/c">>, #{allow_anchor => false}, false}
  233. ],
  234. [{iolist_to_binary(io_lib:format("~0p", [L])),
  235. fun() -> R = resolve_link(L, C, O) end} || {L, C, O, R} <- Tests].
  236. -endif.
  237. %% @todo This function has been added to Erlang/OTP 22.3 as uri_string:resolve/2,3.
  238. resolve(URI, BaseURI) ->
  239. case resolve1(ensure_map_uri(URI), BaseURI) of
  240. TargetURI = #{path := Path0} ->
  241. %% We remove dot segments. Normalizing the entire URI
  242. %% will sometimes add an extra slash we don't want.
  243. #{path := Path} = uri_string:normalize(#{path => Path0}, [return_map]),
  244. TargetURI#{path => Path};
  245. TargetURI ->
  246. TargetURI
  247. end.
  248. resolve1(URI=#{scheme := _}, _) ->
  249. URI;
  250. resolve1(URI=#{host := _}, BaseURI) ->
  251. #{scheme := Scheme} = ensure_map_uri(BaseURI),
  252. URI#{scheme => Scheme};
  253. resolve1(URI=#{path := <<>>}, BaseURI0) ->
  254. BaseURI = ensure_map_uri(BaseURI0),
  255. Keys = case maps:is_key(query, URI) of
  256. true -> [scheme, host, port, path];
  257. false -> [scheme, host, port, path, query]
  258. end,
  259. maps:merge(URI, maps:with(Keys, BaseURI));
  260. resolve1(URI=#{path := <<"/",_/bits>>}, BaseURI0) ->
  261. BaseURI = ensure_map_uri(BaseURI0),
  262. maps:merge(URI, maps:with([scheme, host, port], BaseURI));
  263. resolve1(URI=#{path := Path}, BaseURI0) ->
  264. BaseURI = ensure_map_uri(BaseURI0),
  265. maps:merge(
  266. URI#{path := merge_paths(Path, BaseURI)},
  267. maps:with([scheme, host, port], BaseURI)).
  268. merge_paths(Path, #{host := _, path := <<>>}) ->
  269. <<$/, Path/binary>>;
  270. merge_paths(Path, #{path := BasePath0}) ->
  271. case string:split(BasePath0, <<$/>>, trailing) of
  272. [BasePath, _] -> <<BasePath/binary, $/, Path/binary>>;
  273. [_] -> <<$/, Path/binary>>
  274. end.
  275. ensure_map_uri(URI) when is_map(URI) -> URI;
  276. ensure_map_uri(URI) -> uri_string:parse(iolist_to_binary(URI)).
  277. -ifdef(TEST).
  278. resolve_test_() ->
  279. Tests = [
  280. %% 5.4.1. Normal Examples
  281. {<<"g:h">>, <<"g:h">>},
  282. {<<"g">>, <<"http://a/b/c/g">>},
  283. {<<"./g">>, <<"http://a/b/c/g">>},
  284. {<<"g/">>, <<"http://a/b/c/g/">>},
  285. {<<"/g">>, <<"http://a/g">>},
  286. {<<"//g">>, <<"http://g">>},
  287. {<<"?y">>, <<"http://a/b/c/d;p?y">>},
  288. {<<"g?y">>, <<"http://a/b/c/g?y">>},
  289. {<<"#s">>, <<"http://a/b/c/d;p?q#s">>},
  290. {<<"g#s">>, <<"http://a/b/c/g#s">>},
  291. {<<"g?y#s">>, <<"http://a/b/c/g?y#s">>},
  292. {<<";x">>, <<"http://a/b/c/;x">>},
  293. {<<"g;x">>, <<"http://a/b/c/g;x">>},
  294. {<<"g;x?y#s">>, <<"http://a/b/c/g;x?y#s">>},
  295. {<<"">>, <<"http://a/b/c/d;p?q">>},
  296. {<<".">>, <<"http://a/b/c/">>},
  297. {<<"./">>, <<"http://a/b/c/">>},
  298. {<<"..">>, <<"http://a/b/">>},
  299. {<<"../">>, <<"http://a/b/">>},
  300. {<<"../g">>, <<"http://a/b/g">>},
  301. {<<"../..">>, <<"http://a/">>},
  302. {<<"../../">>, <<"http://a/">>},
  303. {<<"../../g">>, <<"http://a/g">>},
  304. %% 5.4.2. Abnormal Examples
  305. {<<"../../../g">>, <<"http://a/g">>},
  306. {<<"../../../../g">>, <<"http://a/g">>},
  307. {<<"/./g">>, <<"http://a/g">>},
  308. {<<"/../g">>, <<"http://a/g">>},
  309. {<<"g.">>, <<"http://a/b/c/g.">>},
  310. {<<".g">>, <<"http://a/b/c/.g">>},
  311. {<<"g..">>, <<"http://a/b/c/g..">>},
  312. {<<"..g">>, <<"http://a/b/c/..g">>},
  313. {<<"./../g">>, <<"http://a/b/g">>},
  314. {<<"./g/.">>, <<"http://a/b/c/g/">>},
  315. {<<"g/./h">>, <<"http://a/b/c/g/h">>},
  316. {<<"g/../h">>, <<"http://a/b/c/h">>},
  317. {<<"g;x=1/./y">>, <<"http://a/b/c/g;x=1/y">>},
  318. {<<"g;x=1/../y">>, <<"http://a/b/c/y">>},
  319. {<<"g?y/./x">>, <<"http://a/b/c/g?y/./x">>},
  320. {<<"g?y/../x">>, <<"http://a/b/c/g?y/../x">>},
  321. {<<"g#s/./x">>, <<"http://a/b/c/g#s/./x">>},
  322. {<<"g#s/../x">>, <<"http://a/b/c/g#s/../x">>},
  323. {<<"http:g">>, <<"http:g">>} %% for strict parsers
  324. ],
  325. [{V, fun() -> R = uri_string:recompose(resolve(V, <<"http://a/b/c/d;p?q">>)) end} || {V, R} <- Tests].
  326. -endif.
  327. %% Build a link header.
  328. -spec link([#{
  329. target := binary(),
  330. rel := binary(),
  331. attributes := [{binary(), binary()}]
  332. }]) -> iodata().
  333. link(Links) ->
  334. lists:join(<<", ">>, [do_link(Link) || Link <- Links]).
  335. do_link(#{target := TargetURI, rel := Rel, attributes := Params}) ->
  336. [
  337. $<, TargetURI, <<">"
  338. "; rel=\"">>, Rel, $",
  339. [[<<"; ">>, Key, <<"=\"">>, escape(iolist_to_binary(Value), <<>>), $"]
  340. || {Key, Value} <- Params]
  341. ].
  342. escape(<<>>, Acc) -> Acc;
  343. escape(<<$\\,R/bits>>, Acc) -> escape(R, <<Acc/binary,$\\,$\\>>);
  344. escape(<<$\",R/bits>>, Acc) -> escape(R, <<Acc/binary,$\\,$\">>);
  345. escape(<<C,R/bits>>, Acc) -> escape(R, <<Acc/binary,C>>).
  346. -ifdef(TEST).
  347. link_test_() ->
  348. Tests = [
  349. {<<>>, []},
  350. %% Examples from the RFC.
  351. {<<"<http://example.com/TheBook/chapter2>; rel=\"previous\"; title=\"previous chapter\"">>, [
  352. #{
  353. target => <<"http://example.com/TheBook/chapter2">>,
  354. rel => <<"previous">>,
  355. attributes => [
  356. {<<"title">>, <<"previous chapter">>}
  357. ]
  358. }
  359. ]},
  360. {<<"</>; rel=\"http://example.net/foo\"">>, [
  361. #{
  362. target => <<"/">>,
  363. rel => <<"http://example.net/foo">>,
  364. attributes => []
  365. }
  366. ]},
  367. {<<"</terms>; rel=\"copyright\"; anchor=\"#foo\"">>, [
  368. #{
  369. target => <<"/terms">>,
  370. rel => <<"copyright">>,
  371. attributes => [
  372. {<<"anchor">>, <<"#foo">>}
  373. ]
  374. }
  375. ]},
  376. % {<<"</TheBook/chapter2>; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, "
  377. % "</TheBook/chapter4>; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel">>, [
  378. % %% @todo
  379. % ]}
  380. {<<"<http://example.org/>; rel=\"start http://example.net/relation/other\"">>, [
  381. #{
  382. target => <<"http://example.org/">>,
  383. rel => <<"start http://example.net/relation/other">>,
  384. attributes => []
  385. }
  386. ]},
  387. {<<"<https://example.org/>; rel=\"start\", "
  388. "<https://example.org/index>; rel=\"index\"">>, [
  389. #{
  390. target => <<"https://example.org/">>,
  391. rel => <<"start">>,
  392. attributes => []
  393. },
  394. #{
  395. target => <<"https://example.org/index">>,
  396. rel => <<"index">>,
  397. attributes => []
  398. }
  399. ]},
  400. {<<"</>; rel=\"previous\"; quoted=\"name=\\\"value\\\"\"">>, [
  401. #{
  402. target => <<"/">>,
  403. rel => <<"previous">>,
  404. attributes => [
  405. {<<"quoted">>, <<"name=\"value\"">>}
  406. ]
  407. }
  408. ]}
  409. ],
  410. [{iolist_to_binary(io_lib:format("~0p", [V])),
  411. fun() -> R = iolist_to_binary(link(V)) end} || {R, V} <- Tests].
  412. -endif.