1 2 module markov.binary.encoder; 3 4 import markov.chain; 5 import markov.counter; 6 import markov.serialize; 7 import markov.state; 8 9 import std.algorithm; 10 import std.array; 11 import std.bitmanip; 12 import std.conv; 13 import std.outbuffer; 14 import std.range; 15 import std.stdio; 16 import std.traits; 17 18 struct BinaryEncoder(T) 19 if(isEncodable!(T, ubyte[])) 20 { 21 public: 22 ubyte[] encode()(ref MarkovChain!T chain) 23 { 24 auto buffer = new OutBuffer; 25 encode(chain, buffer); 26 return buffer.toBytes; 27 } 28 29 void encode(Range)(ref MarkovChain!T chain, ref Range output) 30 if(isOutputRange!(Range, ubyte)) 31 { 32 encodeStates(chain.states, output); 33 } 34 35 void encodeStates(Range)(State!T[] states, ref Range output) 36 if(isOutputRange!(Range, ubyte)) 37 { 38 encodeValue!(uint)(cast(uint) states.length, output); 39 states.each!(state => encodeState(state, output)); 40 } 41 42 void encodeState(Range)(State!T state, ref Range output) 43 if(isOutputRange!(Range, ubyte)) 44 { 45 encodeValue!(uint)(cast(uint) state.size, output); 46 encodeValue!(uint)(cast(uint) state.length, output); 47 48 foreach(key; state.keys) 49 { 50 encodeTokens(key, output); 51 encodeCounter(state.get(key), output); 52 } 53 } 54 55 void encodeCounter(Range)(Counter!T counter, ref Range output) 56 if(isOutputRange!(Range, ubyte)) 57 { 58 encodeValue!(uint)(cast(uint) counter.length, output); 59 60 foreach(key; counter.keys) 61 { 62 encodeToken(key, output); 63 encodeValue!(uint)(counter.get(key), output); 64 } 65 } 66 67 private: 68 void encodeValue(Type, Range)(Type value, ref Range output) 69 if(isOutputRange!(Range, ubyte) && isSomeString!Type) 70 { 71 // TODO : Handle wide strings. 72 encodeValue!(uint)(cast(uint) value.length, output); 73 put(output, value); 74 } 75 76 void encodeValue(Type, Range)(Type value, ref Range output) 77 if(isOutputRange!(Range, ubyte) && isArray!Type && !isSomeString!Type) 78 { 79 encodeValue!(uint)(cast(uint) value.length, output); 80 value.each!(e => encodeValue!(ForeachType!Type)(e, output)); 81 } 82 83 void encodeValue(Type, Range)(Type value, ref Range output) 84 if(isOutputRange!(Range, ubyte) && isAssociativeArray!Type) 85 { 86 encodeValue!(uint)(value.length, output); 87 88 foreach(key, element; value) 89 { 90 encodeValue!(KeyType!Type)(key, output); 91 encodeValue!(ValueType!Type)(element, output); 92 } 93 } 94 95 void encodeValue(Type, Range)(Type value, ref Range output) 96 if(isOutputRange!(Range, ubyte) && isNumeric!Type) 97 { 98 put(output, value.nativeToBigEndian[]); 99 } 100 101 void encodeValue(Type, Range)(Type value, ref Range output) 102 if(isOutputRange!(Range, ubyte) && isBoolean!Type) 103 { 104 put(output, cast(ubyte)(value ? 1 : 0)); 105 } 106 107 void encodeToken(Range)(T token, ref Range output) 108 if(isOutputRange!(Range, ubyte)) 109 { 110 static if(hasEncodeProperty!(T, ubyte[])) 111 { 112 put(output, token.encode); 113 } 114 else 115 { 116 encodeValue!(T)(token, output); 117 } 118 } 119 120 void encodeTokens(Range)(T[] tokens, ref Range output) 121 if(isOutputRange!(Range, ubyte)) 122 { 123 encodeValue!(uint)(cast(uint) tokens.length, output); 124 tokens.each!(token => encodeToken(token, output)); 125 } 126 } 127 128 ubyte[] encodeBinary(T)(ref MarkovChain!T chain) 129 { 130 BinaryEncoder!T encoder; 131 return encoder.encode(chain); 132 } 133 134 void encodeBinary(T, Range)(ref MarkovChain!T chain, ref Range output) 135 if(isOutputRange!(Range, ubyte)) 136 { 137 BinaryEncoder!T encoder; 138 encoder.encode(chain, output); 139 } 140 141 void encodeBinary(T)(ref MarkovChain!T chain, File output) 142 { 143 static struct FileOutputRange 144 { 145 File _file; 146 147 this(File file) 148 { 149 _file = file; 150 } 151 152 void opCall(ubyte b) 153 { 154 _file.rawWrite([b]); 155 } 156 157 void opCall(ubyte[] b) 158 { 159 _file.rawWrite(b); 160 } 161 } 162 163 BinaryEncoder!T encoder; 164 auto range = FileOutputRange(output); 165 166 encoder.encode(chain, range); 167 } 168 169 unittest 170 { 171 import markov.binary.decoder; 172 auto chain1 = MarkovChain!string(1, 2, 3); 173 chain1.train("a", "b", "c", "e", "b", "a", "b", "a", "c", "e", "d", "c", "b", "a"); 174 175 import markov.json.encoder; 176 chain1.encodeBinary(File("test", "wb")); 177 auto chain3 = decodeBinary!string(File("test", "rb")); 178 auto chain2 = chain1.encodeBinary.decodeBinary!string; 179 180 assert(chain1.sizes.length == chain2.sizes.length); 181 182 foreach(state1, state2; chain1.states.sort!"a.size > b.size".lockstep(chain2.states.sort!"a.size > b.size")) 183 { 184 assert(state1.size == state2.size); 185 assert(state1.keys.length == state2.keys.length); 186 187 foreach(first1, first2; sort(state1.keys).lockstep(sort(state1.keys))) 188 { 189 assert(first1 == first2); 190 auto counters1 = state1.get(first1); 191 auto counters2 = state2.get(first2); 192 193 assert(counters1.total == counters2.total); 194 assert(sort(counters1.keys) == sort(counters2.keys)); 195 196 foreach(follow1, follow2; sort(counters1.keys).lockstep(sort(counters2.keys))) 197 { 198 assert(follow1 == follow2); 199 assert(counters1.get(follow1) == counters2.get(follow2)); 200 } 201 } 202 } 203 }