epgsql_binary.erl 14 KB


  1. %%% @doc
  2. %%% Interface to encoder/decoder for binary postgres data representation
  3. %%% @end
  4. %%% @see epgsql_codec
  5. %%% @see epgsql_wire
  6. %%% @end
  7. %%% Copyright (C) 2008 - Will Glozer. All rights reserved.
  8. %%% XXX: maybe merge this module into epgsql_codec?
  9. -module(epgsql_binary).
  10. -export([new_codec/2,
  11. update_codec/2,
  12. null/1,
  13. is_null/2,
  14. type_to_oid/2,
  15. typeinfo_to_name_array/2,
  16. typeinfo_to_oid_info/2,
  17. oid_to_name/2,
  18. oid_to_info/2,
  19. oid_to_decoder/3,
  20. decode/2, encode/3, supports/2]).
  21. %% Composite type decoders
  22. -export([decode_record/3, decode_array/3]).
  23. -export_type([codec/0, decoder/0]).
  24. -include("protocol.hrl").
  25. -define(DEFAULT_NULLS, [null, undefined]).
  26. -record(codec,
  27. {opts = #{} :: opts(), % not used yet
  28. nulls = ?DEFAULT_NULLS :: nulls(),
  29. oid_db :: epgsql_oid_db:db()}).
  30. -record(array_decoder,
  31. {element_decoder :: decoder(),
  32. null_term :: any() }).
  33. -record(array_encoder,
  34. {element_encoder :: epgsql_codec:codec_entry(),
  35. n_dims = 0 :: non_neg_integer(),
  36. lengths = [] :: [non_neg_integer()],
  37. has_null = false :: boolean(),
  38. codec :: codec()}).
  39. -type nulls() :: [any(), ...].
  40. -type opts() :: #{nulls => nulls()}.
  41. -opaque codec() :: #codec{}.
  42. -opaque decoder() :: {fun((binary(), epgsql:type_name(), epgsql_codec:codec_state()) -> any()),
  43. epgsql:type_name(),
  44. epgsql_codec:codec_state()}.
  45. -type type() :: epgsql:type_name() | {array, epgsql:type_name()}.
  46. -type maybe_unknown_type() :: type() | {unknown_oid, epgsql_oid_db:oid()}.
  47. -define(RECORD_OID, 2249).
  48. -define(RECORD_ARRAY_OID, 2287).
  49. %% Codec is used to convert data (result rows and query parameters) between Erlang and postgresql formats
  50. %% It uses mappings between OID, type names and `epgsql_codec_*' modules (epgsql_oid_db)
  51. -spec new_codec(epgsql_sock:pg_sock(), opts()) -> codec().
  52. new_codec(PgSock, Opts) ->
  53. Codecs = default_codecs(),
  54. Oids = default_oids(),
  55. new_codec(PgSock, Codecs, Oids, Opts).
  56. new_codec(PgSock, Codecs, Oids, Opts) ->
  57. CodecEntries = epgsql_codec:init_mods(Codecs, PgSock),
  58. Types = epgsql_oid_db:join_codecs_oids(Oids, CodecEntries),
  59. #codec{oid_db = epgsql_oid_db:from_list(Types),
  60. nulls = maps:get(nulls, Opts, ?DEFAULT_NULLS),
  61. opts = Opts}.
  62. -spec update_codec([epgsql_oid_db:type_info()], codec()) -> codec().
  63. update_codec(TypeInfos, #codec{oid_db = Db} = Codec) ->
  64. Codec#codec{oid_db = epgsql_oid_db:update(TypeInfos, Db)}.
  65. -spec oid_to_name(epgsql_oid_db:oid(), codec()) -> maybe_unknown_type().
  66. oid_to_name(Oid, Codec) ->
  67. case oid_to_info(Oid, Codec) of
  68. undefined ->
  69. {unknown_oid, Oid};
  70. Type ->
  71. case epgsql_oid_db:type_to_oid_info(Type) of
  72. {_, Name, true} -> {array, Name};
  73. {_, Name, false} -> Name
  74. end
  75. end.
  76. %% @doc Return the value that represents NULL (1st element of `nulls' list)
  77. -spec null(codec()) -> any().
  78. null(#codec{nulls = [Null | _]}) ->
  79. Null.
  80. %% @doc Returns `true' if `Value' is a term representing `NULL'
  81. -spec is_null(any(), codec()) -> boolean().
  82. is_null(Value, #codec{nulls = Nulls}) ->
  83. lists:member(Value, Nulls).
  84. -spec type_to_oid(type(), codec()) -> epgsql_oid_db:oid().
  85. type_to_oid({array, Name}, Codec) ->
  86. type_to_oid(Name, true, Codec);
  87. type_to_oid(Name, Codec) ->
  88. type_to_oid(Name, false, Codec).
  89. -spec type_to_oid(epgsql:type_name(), boolean(), codec()) -> epgsql_oid_db:oid().
  90. type_to_oid(TypeName, IsArray, #codec{oid_db = Db}) ->
  91. epgsql_oid_db:oid_by_name(TypeName, IsArray, Db).
  92. -spec type_to_type_info(type(), codec()) -> epgsql_oid_db:type_info() | undefined.
  93. type_to_type_info({array, Name}, Codec) ->
  94. type_to_info(Name, true, Codec);
  95. type_to_type_info(Name, Codec) ->
  96. type_to_info(Name, false, Codec).
  97. -spec oid_to_info(epgsql_oid_db:oid(), codec()) -> epgsql_oid_db:type_info() | undefined.
  98. oid_to_info(Oid, #codec{oid_db = Db}) ->
  99. epgsql_oid_db:find_by_oid(Oid, Db).
  100. -spec type_to_info(epgsql:type_name(), boolean(), codec()) -> epgsql_oid_db:type_info().
  101. type_to_info(TypeName, IsArray, #codec{oid_db = Db}) ->
  102. epgsql_oid_db:find_by_name(TypeName, IsArray, Db).
  103. -spec typeinfo_to_name_array(Unknown | epgsql_oid_db:type_info(), _) -> Unknown | type() when
  104. Unknown :: {unknown_oid, epgsql_oid_db:oid()}.
  105. typeinfo_to_name_array({unknown_oid, _} = Unknown, _) -> Unknown;
  106. typeinfo_to_name_array(TypeInfo, _) ->
  107. case epgsql_oid_db:type_to_oid_info(TypeInfo) of
  108. {_, Name, false} -> Name;
  109. {_, Name, true} -> {array, Name}
  110. end.
  111. -spec typeinfo_to_oid_info(Unknown | epgsql_oid_db:type_info(), _) ->
  112. Unknown | epgsql_oid_db:oid_info() when
  113. Unknown :: {unknown_oid, epgsql_oid_db:oid()}.
  114. typeinfo_to_oid_info({unknown_oid, _} = Unknown, _) -> Unknown;
  115. typeinfo_to_oid_info(TypeInfo, _) ->
  116. epgsql_oid_db:type_to_oid_info(TypeInfo).
  117. %%
  118. %% Decode
  119. %%
  120. %% @doc decode single cell
  121. -spec decode(binary(), decoder()) -> any().
  122. decode(Bin, {Fun, TypeName, State}) ->
  123. Fun(Bin, TypeName, State).
  124. %% @doc generate decoder to decode PG binary of datatype specified as OID
  125. -spec oid_to_decoder(epgsql_oid_db:oid(), binary | text, codec()) -> decoder().
  126. oid_to_decoder(?RECORD_OID, binary, Codec) ->
  127. {fun ?MODULE:decode_record/3, record, Codec};
  128. oid_to_decoder(?RECORD_ARRAY_OID, binary, Codec) ->
  129. {fun ?MODULE:decode_array/3, array,
  130. #array_decoder{
  131. element_decoder = oid_to_decoder(?RECORD_OID, binary, Codec),
  132. null_term = null(Codec)}};
  133. oid_to_decoder(Oid, Format, #codec{oid_db = Db} = Codec) ->
  134. case epgsql_oid_db:find_by_oid(Oid, Db) of
  135. undefined when Format == binary ->
  136. {fun epgsql_codec_noop:decode/3, undefined, []};
  137. undefined when Format == text ->
  138. {fun epgsql_codec_noop:decode_text/3, undefined, []};
  139. Type ->
  140. make_decoder(Type, Format, Codec)
  141. end.
  142. -spec make_decoder(epgsql_oid_db:type_info(), binary | text, codec()) -> decoder().
  143. make_decoder(Type, Format, Codec) ->
  144. {Name, Mod, State} = epgsql_oid_db:type_to_codec_entry(Type),
  145. {_Oid, Name, IsArray} = epgsql_oid_db:type_to_oid_info(Type),
  146. make_decoder(Name, Mod, State, Codec, Format, IsArray).
  147. make_decoder(_Name, _Mod, _State, _Codec, text, true) ->
  148. %% Don't try to decode text arrays
  149. {fun epgsql_codec_noop:decode_text/3, undefined, []};
  150. make_decoder(Name, Mod, State, _Codec, text, false) ->
  151. %% decode_text/3 is optional callback. If it's not defined, do NOOP.
  152. case erlang:function_exported(Mod, decode_text, 3) of
  153. true ->
  154. {fun Mod:decode_text/3, Name, State};
  155. false ->
  156. {fun epgsql_codec_noop:decode_text/3, undefined, []}
  157. end;
  158. make_decoder(Name, Mod, State, Codec, binary, true) ->
  159. {fun ?MODULE:decode_array/3, array,
  160. #array_decoder{
  161. element_decoder = {fun Mod:decode/3, Name, State},
  162. null_term = null(Codec)}};
  163. make_decoder(Name, Mod, State, _Codec, binary, false) ->
  164. {fun Mod:decode/3, Name, State}.
  165. %% Array decoding
  166. %%% $PG$/src/backend/utils/adt/arrayfuncs.c
  167. decode_array(<<NDims:?int32, _HasNull:?int32, _Oid:?int32, Rest/binary>>, _, ArrayDecoder) ->
  168. %% 4b: n_dimensions;
  169. %% 4b: flags;
  170. %% 4b: Oid // should be the same as in column spec;
  171. %% (4b: n_elements;
  172. %% 4b: lower_bound) * n_dimensions
  173. %% (dynamic-size data)
  174. %% Lower bound - eg, zero-bound or 1-bound or N-bound array. We ignore it, see
  175. %% https://www.postgresql.org/docs/current/static/arrays.html#arrays-io
  176. {Dims, Data} = erlang:split_binary(Rest, NDims * 2 * 4),
  177. Lengths = [Len || <<Len:?int32, _LBound:?int32>> <= Dims],
  178. {Array, <<>>} = decode_array1(Data, Lengths, ArrayDecoder),
  179. Array.
  180. decode_array1(Data, [], _) ->
  181. %% zero-dimensional array
  182. {[], Data};
  183. decode_array1(Data, [Len], ArrayDecoder) ->
  184. %% 1-dimensional array
  185. decode_elements(Data, [], Len, ArrayDecoder);
  186. decode_array1(Data, [Len | T], ArrayDecoder) ->
  187. %% multidimensional array
  188. F = fun(_N, Rest) -> decode_array1(Rest, T, ArrayDecoder) end,
  189. lists:mapfoldl(F, Data, lists:seq(1, Len)).
  190. decode_elements(Rest, Acc, 0, _ArDec) ->
  191. {lists:reverse(Acc), Rest};
  192. decode_elements(<<-1:?int32, Rest/binary>>, Acc, N,
  193. #array_decoder{null_term = Null} = ArDec) ->
  194. decode_elements(Rest, [Null | Acc], N - 1, ArDec);
  195. decode_elements(<<Len:?int32, Value:Len/binary, Rest/binary>>, Acc, N,
  196. #array_decoder{element_decoder = ElemDecoder} = ArDecoder) ->
  197. Value2 = decode(Value, ElemDecoder),
  198. decode_elements(Rest, [Value2 | Acc], N - 1, ArDecoder).
  199. %% Record decoding
  200. %% $PG$/src/backend/utils/adt/rowtypes.c
  201. decode_record(<<Size:?int32, Bin/binary>>, record, Codec) ->
  202. list_to_tuple(decode_record1(Bin, Size, Codec)).
  203. decode_record1(<<>>, 0, _Codec) -> [];
  204. decode_record1(<<_Type:?int32, -1:?int32, Rest/binary>>, Size, Codec) ->
  205. [null(Codec) | decode_record1(Rest, Size - 1, Codec)];
  206. decode_record1(<<Oid:?int32, Len:?int32, ValueBin:Len/binary, Rest/binary>>, Size, Codec) ->
  207. Value = decode(ValueBin, oid_to_decoder(Oid, binary, Codec)),
  208. [Value | decode_record1(Rest, Size - 1, Codec)].
  209. %%
  210. %% Encode
  211. %%
  212. %% Convert erlang value to PG binary of type, specified by type name
  213. -spec encode(epgsql:type_name() | {array, epgsql:type_name()}, any(), codec()) -> iolist().
  214. encode(TypeName, Value, Codec) ->
  215. Type = type_to_type_info(TypeName, Codec),
  216. encode_with_type(Type, Value, Codec).
  217. encode_with_type(Type, Value, Codec) ->
  218. NameModState = epgsql_oid_db:type_to_codec_entry(Type),
  219. case epgsql_oid_db:type_to_oid_info(Type) of
  220. {_ArrayOid, _, true} ->
  221. %FIXME: check if this OID is the same as was returned by 'Describe'
  222. ElementOid = epgsql_oid_db:type_to_element_oid(Type),
  223. encode_array(Value, ElementOid,
  224. #array_encoder{
  225. element_encoder = NameModState,
  226. codec = Codec});
  227. {_Oid, _, false} ->
  228. encode_value(Value, NameModState)
  229. end.
  230. encode_value(Value, {Name, Mod, State}) ->
  231. Payload = epgsql_codec:encode(Mod, Value, Name, State),
  232. [<<(iolist_size(Payload)):?int32>> | Payload].
  233. %% Number of dimensions determined at encode-time by introspection of data, so,
  234. %% we can't encode array of lists (eg. strings).
  235. encode_array(Array, Oid, ArrayEncoder) ->
  236. {Data, {NDims, Lengths, HasNull}} = encode_array_dims(Array, ArrayEncoder),
  237. Lens = [<<N:?int32, 1:?int32>> || N <- lists:reverse(Lengths)],
  238. HasNullInt = case HasNull of
  239. true -> 1;
  240. false -> 0
  241. end,
  242. Hdr = <<NDims:?int32, HasNullInt:?int32, Oid:?int32>>,
  243. Payload = [Hdr, Lens, Data],
  244. [<<(iolist_size(Payload)):?int32>> | Payload].
  245. encode_array_dims([], #array_encoder{n_dims = NDims,
  246. lengths = Lengths,
  247. has_null = HasNull}) ->
  248. {[], {NDims, Lengths, HasNull}};
  249. encode_array_dims([H | _] = Array,
  250. #array_encoder{n_dims = NDims0,
  251. lengths = Lengths0,
  252. has_null = HasNull0,
  253. codec = Codec,
  254. element_encoder = ValueEncoder}) when not is_list(H) ->
  255. F = fun(El, {Len, HasNull1}) ->
  256. case is_null(El, Codec) of
  257. false ->
  258. {encode_value(El, ValueEncoder), {Len + 1, HasNull1}};
  259. true ->
  260. {<<-1:?int32>>, {Len + 1, true}}
  261. end
  262. end,
  263. {Data, {Len, HasNull2}} = lists:mapfoldl(F, {0, HasNull0}, Array),
  264. {Data, {NDims0 + 1, [Len | Lengths0], HasNull2}};
  265. encode_array_dims(Array, #array_encoder{lengths = Lengths0,
  266. n_dims = NDims0,
  267. has_null = HasNull0} = ArrayEncoder) ->
  268. Lengths1 = [length(Array) | Lengths0],
  269. F = fun(A2, {_NDims, _Lengths, HasNull1}) ->
  270. encode_array_dims(A2, ArrayEncoder#array_encoder{
  271. n_dims = NDims0,
  272. has_null = HasNull1,
  273. lengths = Lengths1})
  274. end,
  275. {Data, {NDims2, Lengths2, HasNull2}} =
  276. lists:mapfoldl(F, {NDims0, Lengths1, HasNull0}, Array),
  277. {Data, {NDims2 + 1, Lengths2, HasNull2}}.
  278. %% Supports
  279. supports(RecOid, _) when RecOid == ?RECORD_OID; RecOid == ?RECORD_ARRAY_OID ->
  280. true;
  281. supports(Oid, #codec{oid_db = Db}) ->
  282. epgsql_oid_db:find_by_oid(Oid, Db) =/= undefined.
  283. %% Default codec set
  284. %% XXX: maybe move to application env?
  285. -spec default_codecs() -> [{epgsql_codec:codec_mod(), any()}].
  286. default_codecs() ->
  287. [{epgsql_codec_boolean, []},
  288. {epgsql_codec_bpchar, []},
  289. {epgsql_codec_datetime, []},
  290. {epgsql_codec_float, []},
  291. {epgsql_codec_geometric, []},
  292. %% {epgsql_codec_hstore, []},
  293. {epgsql_codec_integer, []},
  294. {epgsql_codec_intrange, []},
  295. {epgsql_codec_json, []},
  296. {epgsql_codec_net, []},
  297. %% {epgsql_codec_postgis,[]},
  298. {epgsql_codec_text, []},
  299. {epgsql_codec_timerange, []},
  300. {epgsql_codec_uuid, []}
  301. ].
  302. -spec default_oids() -> [epgsql_oid_db:oid_entry()].
  303. default_oids() ->
  304. [{bool, 16, 1000},
  305. {bpchar, 1042, 1014},
  306. {bytea, 17, 1001},
  307. {char, 18, 1002},
  308. {cidr, 650, 651},
  309. {date, 1082, 1182},
  310. {daterange, 3912, 3913},
  311. {float4, 700, 1021},
  312. {float8, 701, 1022},
  313. %% {geometry, 17063, 17071},
  314. %% {hstore, 16935, 16940},
  315. {inet, 869, 1041},
  316. {int2, 21, 1005},
  317. {int4, 23, 1007},
  318. {int4range, 3904, 3905},
  319. {int8, 20, 1016},
  320. {int8range, 3926, 3927},
  321. {interval, 1186, 1187},
  322. {json, 114, 199},
  323. {jsonb, 3802, 3807},
  324. {macaddr, 829, 1040},
  325. {macaddr8, 774, 775},
  326. {point, 600, 1017},
  327. {text, 25, 1009},
  328. {time, 1083, 1183},
  329. {timestamp, 1114, 1115},
  330. {timestamptz, 1184, 1185},
  331. {timetz, 1266, 1270},
  332. {tsrange, 3908, 3909},
  333. {tstzrange, 3910, 3911},
  334. {uuid, 2950, 2951},
  335. {varchar, 1043, 1015}
  336. ].