bert.d 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. import std.stdio;
  2. public{
  3. import std.bigint; // *10-15 times more slow than gmp but this works -- gmp-d conflicts with other deps ((
  4. import std.conv;
  5. /*
  6. // example std.bigint usage
  7. //BigInt num2 = 1;
  8. //auto num1 = 1;
  9. auto num1 = "1";
  10. BigInt num2 = BigInt(num1);
  11. num2 += 2;
  12. string num3 = num2.to!string;
  13. writefln("num2 = %s - %s", num2, typeof(num2).stringof);
  14. writefln("num3 = %s - %s", num3, typeof(num3).stringof);
  15. BigInt num = 0;
  16. for(ulong i = 1; i < 4_000_001; i++){
  17. num += i;
  18. }
  19. writefln("SUM(1, 4_000_000) = %s", num);
  20. num = 1;
  21. for(ubyte j = 1; j < 101; j++){
  22. num *= j;
  23. }
  24. writefln("PRODUCT(1, 100) = %s", num);
  25. */
  26. }
  27. private{
  28. import std.bitmanip;
  29. import std.string;
  30. import std.utf;
  31. import std.math;
  32. import std.traits;
  33. import std.algorithm;
  34. import std.range;
  35. import std.digest : toHexString;
  36. import std.array : appender;
  37. import std.format : format;
  38. }
  39. enum BERT_TAG : ubyte{
  40. VERSION = 131,
  41. SMALL_INT = 97,
  42. INT = 98,
  43. BIGINT = 110,
  44. //LARGE_BIG = 111,
  45. FLOAT = 70,
  46. ATOM = 118,
  47. TUPLE = 104, // SMALL_TUPLE
  48. LARGE_TUPLE = 105,
  49. NIL = 106,
  50. LIST = 108,
  51. BINARY = 109, // use BINARY as STRING
  52. MAP = 116
  53. }
  54. enum BertType{
  55. Int,
  56. BigInt,
  57. Float,
  58. Atom,
  59. Tuple,
  60. List,
  61. Binary,
  62. Map,
  63. Nil
  64. }
  65. struct BertValue{
  66. BertType type_;
  67. union{
  68. long intValue;
  69. BigInt bigintValue;
  70. double floatValue;
  71. string atomValue;
  72. BertValue[] tupleValue;
  73. BertValue[] listValue;
  74. ubyte[] binaryValue;
  75. BertValue[BertValue] mapValue;
  76. }
  77. ubyte[] encode() const{
  78. final switch(type_){
  79. case BertType.Int:
  80. return encodeInt(intValue);
  81. case BertType.BigInt:
  82. return encodeBigInt(bigintValue);
  83. case BertType.Float:
  84. return encodeFloat(floatValue);
  85. case BertType.Atom:
  86. return encodeAtom(atomValue);
  87. case BertType.Tuple:
  88. return encodeTuple(tupleValue.dup);
  89. case BertType.List:
  90. return encodeList(listValue.dup);
  91. case BertType.Binary:
  92. return encodeBinary(binaryValue.dup);
  93. case BertType.Map:
  94. return encodeMap(mapValue.dup);
  95. case BertType.Nil:
  96. return [cast(ubyte)BERT_TAG.NIL];
  97. }
  98. }
  99. string toString() const{
  100. final switch(type_){
  101. case BertType.Int:
  102. return to!string(intValue);
  103. case BertType.BigInt:
  104. return bigintValue.to!string;
  105. case BertType.Float:
  106. return to!string(floatValue);
  107. case BertType.Atom:
  108. return format("'%s'", atomValue);
  109. case BertType.Tuple:
  110. return "{" ~ tupleValue.map!(e => e.toString()).join(", ") ~ "}";
  111. case BertType.List:
  112. return "[" ~ listValue.map!(e => e.toString()).join(", ") ~ "]";
  113. case BertType.Binary:
  114. auto result = appender!string();
  115. result.put("<<");
  116. foreach(i, b; binaryValue){
  117. if(i > 0){ result.put(","); }
  118. result.put(to!string(b));
  119. }
  120. result.put(">>");
  121. return result.data;
  122. case BertType.Map:
  123. string[] pairs;
  124. foreach(key, value; mapValue){
  125. pairs ~= format("%s: %s", key.toString(), value.toString());
  126. }
  127. return "#{" ~ pairs.join(", ") ~ "}";
  128. case BertType.Nil:
  129. return "[]";
  130. }
  131. }
  132. }
  133. ubyte[] bertEncode(BertValue term){
  134. return [cast(ubyte)BERT_TAG.VERSION] ~ term.encode();
  135. }
  136. ubyte[] encodeInt(ubyte value){ // small_integer - uint8
  137. return [cast(ubyte)BERT_TAG.SMALL_INT, value];
  138. }
  139. ubyte[] encodeInt(ushort value){ // integer - uint16
  140. if(value <= 255) {
  141. return [cast(ubyte)BERT_TAG.SMALL_INT, cast(ubyte)value];
  142. }else{
  143. ubyte[4] bytes = nativeToBigEndian!int(cast(int)value);
  144. return [cast(ubyte)BERT_TAG.INT] ~ bytes[];
  145. }
  146. }
  147. ubyte[] encodeInt(uint value){ // integer - uint32
  148. if(value <= 255){
  149. return [cast(ubyte)BERT_TAG.SMALL_INT, cast(ubyte)value];
  150. }else if(value <= 2147483647) {
  151. ubyte[4] bytes = nativeToBigEndian!int(cast(int)value);
  152. return [cast(ubyte)BERT_TAG.INT] ~ bytes[];
  153. }else{
  154. return encodeBigInt( cast(ulong)value );
  155. }
  156. }
  157. ubyte[] encodeInt(ulong value){ // integer - small_big_int - uint64 ;; we do not use erlang large_big_int because small_big_int max value = 2^(8*255) - 1 ;; it is too enaught
  158. if(value <= 255){
  159. return [cast(ubyte)BERT_TAG.SMALL_INT, cast(ubyte)value];
  160. }else if(value <= 2147483647){
  161. ubyte[4] bytes = nativeToBigEndian!int(cast(int)value);
  162. return [cast(ubyte)BERT_TAG.INT] ~ bytes[];
  163. }else{
  164. return encodeBigInt(value);
  165. }
  166. }
  167. ubyte[] encodeBigInt(ulong value){
  168. ubyte[8] temp;
  169. size_t len = 0;
  170. while(value > 0){
  171. temp[len++] = cast(ubyte)(value & 0xFF);
  172. value >>= 8;
  173. }
  174. if(len == 0){
  175. return [ cast(ubyte)BERT_TAG.BIGINT, 1, 0, 0 ];
  176. }
  177. ubyte[] result = [
  178. cast(ubyte)BERT_TAG.BIGINT,
  179. cast(ubyte)len,
  180. 0
  181. ];
  182. foreach_reverse (i; 0..len){
  183. result ~= temp[i];
  184. }
  185. return result;
  186. }
  187. ubyte[] encodeBigInt(BigInt value){
  188. if(value == 0){
  189. return [cast(ubyte)BERT_TAG.BIGINT, 1, 0];
  190. }
  191. ubyte[] digits;
  192. while(value > 0){
  193. digits ~= cast(ubyte)(value & 0xFF);
  194. value >>= 8;
  195. }
  196. ubyte[] result;
  197. result = [ cast(ubyte)BERT_TAG.BIGINT, // we do not use erlang large_big_int because small_big_int max value = 2^(8*255) - 1 ;; it is too enaught
  198. cast(ubyte)digits.length,
  199. cast(ubyte)(0) // no sign
  200. ];
  201. result ~= digits; // in little-endian
  202. return result;
  203. }
  204. ubyte[] encodeFloat(double value){
  205. ubyte[8] bytes = nativeToBigEndian!double(value);
  206. return [cast(ubyte)BERT_TAG.FLOAT] ~ bytes[];
  207. }
  208. ubyte[] encodeAtom(string name){
  209. if(name.length > 255){ throw new Exception("Atom too long"); }
  210. return [
  211. cast(ubyte)BERT_TAG.ATOM,
  212. cast(ubyte)(name.length >> 8),
  213. cast(ubyte)(name.length & 0xFF)
  214. ] ~ cast(ubyte[])name;
  215. }
  216. ubyte[] encodeTuple(BertValue[] elements){
  217. ubyte[] result;
  218. if(elements.length <= 255){
  219. result = [cast(ubyte)BERT_TAG.TUPLE, cast(ubyte)elements.length];
  220. }else{
  221. result = [cast(ubyte)BERT_TAG.LARGE_TUPLE] ~ nativeToBigEndian!uint(cast(uint)elements.length);
  222. }
  223. foreach(elem; elements){
  224. result ~= elem.encode();
  225. }
  226. return result;
  227. }
  228. ubyte[] encodeList(BertValue[] elements){
  229. ubyte[4] lenBytes = nativeToBigEndian!uint(cast(uint)elements.length);
  230. return [cast(ubyte)BERT_TAG.LIST] ~ lenBytes[] ~ elements.map!(e => e.encode()).join() ~ cast(ubyte)BERT_TAG.NIL;
  231. }
  232. ubyte[] encodeBinary(ubyte[] data){
  233. ubyte[4] lenBytes = nativeToBigEndian!uint(cast(uint)data.length);
  234. return [cast(ubyte)BERT_TAG.BINARY] ~ lenBytes[] ~ data;
  235. }
  236. ubyte[] encodeMap(const BertValue[BertValue] map){
  237. ubyte[4] lenBytes = nativeToBigEndian!uint(cast(uint)map.length);
  238. return [cast(ubyte)BERT_TAG.MAP] ~ lenBytes[] ~ map.byPair.map!(p => p.key.encode() ~ p.value.encode()).join();
  239. }
  240. BertValue bertInt(ubyte value){
  241. BertValue v;
  242. v.type_ = BertType.Int;
  243. v.intValue = value;
  244. return v;
  245. }
  246. BertValue bertInt(ushort value){
  247. BertValue v;
  248. v.type_ = BertType.Int;
  249. v.intValue = value;
  250. return v;
  251. }
  252. BertValue bertInt(uint value){
  253. BertValue v;
  254. v.type_ = BertType.Int;
  255. v.intValue = value;
  256. return v;
  257. }
  258. BertValue bertInt(ulong value){
  259. BertValue v;
  260. v.type_ = BertType.Int;
  261. v.intValue = value;
  262. return v;
  263. }
  264. BertValue bertBigInt(BigInt value){
  265. BertValue v;
  266. v.type_ = BertType.BigInt;
  267. v.bigintValue = value;
  268. return v;
  269. }
  270. BertValue bertFloat(double value){
  271. BertValue v;
  272. v.type_ = BertType.Float;
  273. v.floatValue = value;
  274. return v;
  275. }
  276. BertValue bertAtom(string name){
  277. BertValue v;
  278. v.type_ = BertType.Atom;
  279. v.atomValue = name;
  280. return v;
  281. }
  282. BertValue bertBinary(ubyte[] data){
  283. BertValue v;
  284. v.type_ = BertType.Binary;
  285. v.binaryValue = data;
  286. return v;
  287. }
  288. BertValue bertList(BertValue[] elements){
  289. BertValue v;
  290. v.type_ = BertType.List;
  291. v.listValue = elements;
  292. return v;
  293. }
  294. BertValue bertTuple(BertValue[] elements){
  295. BertValue v;
  296. v.type_ = BertType.Tuple;
  297. v.tupleValue = elements;
  298. return v;
  299. }
  300. BertValue bertMap(BertValue[BertValue] map){
  301. BertValue v;
  302. v.type_ = BertType.Map;
  303. v.mapValue = map;
  304. return v;
  305. }
  306. BertValue bertNil(){
  307. BertValue v;
  308. v.type_ = BertType.Nil;
  309. return v;
  310. }
  311. struct BertDecoder{
  312. ubyte[] data;
  313. size_t pos;
  314. BertValue decode(){
  315. try{
  316. if(pos >= data.length){ throw new Exception("No data to decode"); }
  317. if(data[pos++] != BERT_TAG.VERSION){
  318. throw new Exception("Invalid BERT format: missing version byte");
  319. }
  320. return decodeValue();
  321. }catch (Exception e){
  322. writeln("Got error: ", e.msg);
  323. return bertNil();
  324. }
  325. }
  326. private BertValue decodeValue(){
  327. if(pos >= data.length){ throw new Exception("Unexpected end of data"); }
  328. ubyte tag = data[pos++];
  329. switch(tag){
  330. case BERT_TAG.SMALL_INT:
  331. if(pos >= data.length){ throw new Exception("Incomplete SMALL_INT"); }
  332. return bertInt( cast(ubyte)data[pos++] );
  333. case BERT_TAG.INT:
  334. if((pos + 4) > data.length){ throw new Exception("Incomplete INT"); }
  335. uint value = bigEndianToNative!uint(data[pos..pos+4][0..4]);
  336. pos += 4;
  337. if(value <= ubyte.max){ // maybe can to ubyte, ushort
  338. return bertInt( cast(ubyte)value );
  339. }else if(value <= ushort.max){
  340. return bertInt( cast(ushort)value );
  341. }else{ // uint
  342. return bertInt(value);
  343. }
  344. case BERT_TAG.FLOAT:
  345. if((pos + 8) > data.length){ throw new Exception("Incomplete FLOAT"); }
  346. double fvalue = bigEndianToNative!double(data[pos..pos+8][0..8]);
  347. pos += 8;
  348. return bertFloat(fvalue);
  349. case BERT_TAG.ATOM:
  350. if((pos + 2) > data.length){ throw new Exception("Incomplete ATOM length"); }
  351. ushort len = (cast(ushort)data[pos] << 8) | data[pos+1];
  352. pos += 2;
  353. if(pos + len > data.length){ throw new Exception("Incomplete ATOM data"); }
  354. string atom = cast(string)data[pos..pos+len];
  355. pos += len;
  356. return bertAtom(atom);
  357. case BERT_TAG.TUPLE:
  358. if(pos >= data.length){ throw new Exception("Incomplete TUPLE"); }
  359. ubyte arity = data[pos++];
  360. auto elements = new BertValue[arity];
  361. foreach(i; 0..arity){
  362. elements[i] = decodeValue();
  363. }
  364. return bertTuple(elements);
  365. case BERT_TAG.LIST:
  366. if((pos + 4) > data.length){ throw new Exception("Incomplete LIST length"); }
  367. uint len = bigEndianToNative!uint(data[pos..pos+4][0..4]);
  368. pos += 4;
  369. auto elements = new BertValue[len];
  370. foreach(i; 0..len){
  371. elements[i] = decodeValue();
  372. }
  373. if(pos >= data.length || data[pos++] != BERT_TAG.NIL){
  374. throw new Exception("Missing NIL terminator for LIST");
  375. }
  376. return bertList(elements);
  377. case BERT_TAG.BINARY:
  378. if((pos + 4) > data.length){ throw new Exception("Incomplete BINARY length"); }
  379. uint len = bigEndianToNative!uint(data[pos..pos+4][0..4]);
  380. pos += 4;
  381. if((pos + len) > data.length){ throw new Exception("Incomplete BINARY data"); }
  382. auto bin = bertBinary(data[pos..pos+len]);
  383. pos += len;
  384. return bin;
  385. case BERT_TAG.NIL:
  386. return bertNil();
  387. case BERT_TAG.BIGINT:
  388. return decodeBigInt();
  389. case BERT_TAG.MAP:
  390. if((pos + 4) > data.length){ throw new Exception("Incomplete MAP size"); }
  391. uint size = bigEndianToNative!uint(data[pos..pos+4][0..4]);
  392. pos += 4;
  393. BertValue[BertValue] map;
  394. foreach(i; 0..size){
  395. auto key = decodeValue();
  396. auto value = decodeValue();
  397. map[key] = value;
  398. }
  399. return bertMap(map);
  400. default:
  401. throw new Exception(format("Unknown BERT tag: 0x%x", tag));
  402. }
  403. }
  404. private BertValue decodeBigInt(){ // BIGINT, not use LARGE_BIG
  405. if(pos >= data.length){ throw new Exception("Incomplete BIGINT"); }
  406. uint n = data[pos++];
  407. if(pos >= data.length){ throw new Exception("Incomplete BIG sign"); }
  408. pos++; // skip sign
  409. if((pos + n) > data.length){ throw new Exception("Incomplete BIG data"); }
  410. if(n <= 8){ // maybe can to ubyte, ushort, uint, ulong
  411. if(n == 1){
  412. return bertInt( cast(ubyte)data[pos++] );
  413. }
  414. ulong value = 0;
  415. foreach(i; 0..n){
  416. value += cast(ulong)data[pos++] << (8 * i);
  417. }
  418. if(n == 2){
  419. return bertInt( cast(ushort)value );
  420. }else if( (n == 3) || (n == 4) ){
  421. return bertInt( cast(uint)value );
  422. }else{ // ( n == 5) || (n == 6) || (n == 7) || (n == 8)
  423. return bertInt(value);
  424. }
  425. }else{ // just bigint
  426. BigInt value = 0;
  427. foreach(i; 0..n){
  428. value += BigInt(data[pos++]) << (8 * i);
  429. }
  430. return bertBigInt(value);
  431. }
  432. }
  433. }