1 
2 module markov.binary.encoder;
3 
4 import markov.chain;
5 import markov.counter;
6 import markov.serialize;
7 import markov.state;
8 
9 import std.algorithm;
10 import std.array;
11 import std.bitmanip;
12 import std.conv;
13 import std.outbuffer;
14 import std.range;
15 import std.stdio;
16 import std.traits;
17 
18 struct BinaryEncoder(T)
19 if(isEncodable!(T, ubyte[]))
20 {
21 public:
22     ubyte[] encode()(ref MarkovChain!T chain)
23     {
24         auto buffer = new OutBuffer;
25         encode(chain, buffer);
26         return buffer.toBytes;
27     }
28 
29     void encode(Range)(ref MarkovChain!T chain, ref Range output)
30     if(isOutputRange!(Range, ubyte))
31     {
32         encodeStates(chain.states, output);
33     }
34 
35     void encodeStates(Range)(State!T[] states, ref Range output)
36     if(isOutputRange!(Range, ubyte))
37     {
38         encodeValue!(uint)(cast(uint) states.length, output);
39         states.each!(state => encodeState(state, output));
40     }
41 
42     void encodeState(Range)(State!T state, ref Range output)
43     if(isOutputRange!(Range, ubyte))
44     {
45         encodeValue!(uint)(cast(uint) state.size, output);
46         encodeValue!(uint)(cast(uint) state.length, output);
47 
48         foreach(key; state.keys)
49         {
50             encodeTokens(key, output);
51             encodeCounter(state.get(key), output);
52         }
53     }
54 
55     void encodeCounter(Range)(Counter!T counter, ref Range output)
56     if(isOutputRange!(Range, ubyte))
57     {
58         encodeValue!(uint)(cast(uint) counter.length, output);
59 
60         foreach(key; counter.keys)
61         {
62             encodeToken(key, output);
63             encodeValue!(uint)(counter.get(key), output);
64         }
65     }
66 
67 private:
68     void encodeValue(Type, Range)(Type value, ref Range output)
69     if(isOutputRange!(Range, ubyte) && isSomeString!Type)
70     {
71         // TODO : Handle wide strings.
72         encodeValue!(uint)(cast(uint) value.length, output);
73         put(output, value);
74     }
75 
76     void encodeValue(Type, Range)(Type value, ref Range output)
77     if(isOutputRange!(Range, ubyte) && isArray!Type && !isSomeString!Type)
78     {
79         encodeValue!(uint)(cast(uint) value.length, output);
80         value.each!(e => encodeValue!(ForeachType!Type)(e, output));
81     }
82 
83     void encodeValue(Type, Range)(Type value, ref Range output)
84     if(isOutputRange!(Range, ubyte) && isAssociativeArray!Type)
85     {
86         encodeValue!(uint)(value.length, output);
87 
88         foreach(key, element; value)
89         {
90             encodeValue!(KeyType!Type)(key, output);
91             encodeValue!(ValueType!Type)(element, output);
92         }
93     }
94 
95     void encodeValue(Type, Range)(Type value, ref Range output)
96     if(isOutputRange!(Range, ubyte) && isNumeric!Type)
97     {
98         put(output, value.nativeToBigEndian[]);
99     }
100 
101     void encodeValue(Type, Range)(Type value, ref Range output)
102     if(isOutputRange!(Range, ubyte) && isBoolean!Type)
103     {
104         put(output, cast(ubyte)(value ? 1 : 0));
105     }
106 
107     void encodeToken(Range)(T token, ref Range output)
108     if(isOutputRange!(Range, ubyte))
109     {
110         static if(hasEncodeProperty!(T, ubyte[]))
111         {
112             put(output, token.encode);
113         }
114         else
115         {
116             encodeValue!(T)(token, output);
117         }
118     }
119 
120     void encodeTokens(Range)(T[] tokens, ref Range output)
121     if(isOutputRange!(Range, ubyte))
122     {
123         encodeValue!(uint)(cast(uint) tokens.length, output);
124         tokens.each!(token => encodeToken(token, output));
125     }
126 }
127 
128 ubyte[] encodeBinary(T)(ref MarkovChain!T chain)
129 {
130     BinaryEncoder!T encoder;
131     return encoder.encode(chain);
132 }
133 
134 void encodeBinary(T, Range)(ref MarkovChain!T chain, ref Range output)
135 if(isOutputRange!(Range, ubyte))
136 {
137     BinaryEncoder!T encoder;
138     encoder.encode(chain, output);
139 }
140 
141 void encodeBinary(T)(ref MarkovChain!T chain, File output)
142 {
143     static struct FileOutputRange
144     {
145         File _file;
146 
147         this(File file)
148         {
149             _file = file;
150         }
151 
152         void opCall(ubyte b)
153         {
154             _file.rawWrite([b]);
155         }
156 
157         void opCall(ubyte[] b)
158         {
159             _file.rawWrite(b);
160         }
161     }
162 
163     BinaryEncoder!T encoder;
164     auto range = FileOutputRange(output);
165 
166     encoder.encode(chain, range);
167 }
168 
169 unittest
170 {
171     import markov.binary.decoder;
172     auto chain1 = MarkovChain!string(1, 2, 3);
173     chain1.train("a", "b", "c", "e", "b", "a", "b", "a", "c", "e", "d", "c", "b", "a");
174 
175     import markov.json.encoder;
176     chain1.encodeBinary(File("test", "wb"));
177     auto chain3 = decodeBinary!string(File("test", "rb"));
178     auto chain2 = chain1.encodeBinary.decodeBinary!string;
179 
180     assert(chain1.sizes.length == chain2.sizes.length);
181 
182     foreach(state1, state2; chain1.states.sort!"a.size > b.size".lockstep(chain2.states.sort!"a.size > b.size"))
183     {
184         assert(state1.size == state2.size);
185         assert(state1.keys.length == state2.keys.length);
186 
187         foreach(first1, first2; sort(state1.keys).lockstep(sort(state1.keys)))
188         {
189             assert(first1 == first2);
190             auto counters1 = state1.get(first1);
191             auto counters2 = state2.get(first2);
192 
193             assert(counters1.total == counters2.total);
194             assert(sort(counters1.keys) == sort(counters2.keys));
195 
196             foreach(follow1, follow2; sort(counters1.keys).lockstep(sort(counters2.keys)))
197             {
198                 assert(follow1 == follow2);
199                 assert(counters1.get(follow1) == counters2.get(follow2));
200             }
201         }
202     }
203 }