1 /* 2 * This code is derivative of guess.c of Gauche-0.8.3. 3 * The following is the original copyright notice. 4 */ 5 6 /* 7 * Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * 3. Neither the name of the authors nor the names of its contributors 21 * may be used to endorse or promote products derived from this 22 * software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 30 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 31 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 32 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 * 36 */ 37 /** 38 * 39 * 40 * License: BSD 3-clause 41 */ 42 module libguess_d.dfa; 43 44 45 private static import libguess_d.encoding; 46 47 /** 48 * data types 49 */ 50 package struct guess_arc 51 { 52 public: 53 /** 54 * next state 55 */ 56 byte next; 57 58 /** 59 * score 60 */ 61 double score; 62 63 invariant 64 { 65 assert(this.next >= 0); 66 } 67 } 68 69 /** 70 * 71 */ 72 package struct guess_dfa 73 { 74 static import libguess_d.encoding; 75 76 private: 77 /** 78 * 79 */ 80 immutable byte[][256]* states; 81 82 /** 83 * 84 */ 85 immutable .guess_arc[]* arcs; 86 87 /** 88 * 89 */ 90 byte state = 0; 91 92 /** 93 * 94 */ 95 immutable libguess_d.encoding.libguess_encoding name; 96 97 public: 98 /** 99 * 100 */ 101 double score = 1.0; 102 103 /** 104 * init data 105 * 106 * Params: 107 * st = guess_*_st pointer 108 * ar = guess_*_ar pointer 109 */ 110 pragma(inline, true) 111 pure nothrow @safe @nogc 112 this(immutable byte[][256]* st, immutable .guess_arc[]* ar, const libguess_d.encoding.libguess_encoding name) 113 114 do 115 { 116 this.states = st; 117 this.arcs = ar; 118 this.name = name; 119 } 120 121 /** 122 * check whether this.state is available 123 */ 124 pragma(inline, true) 125 pure nothrow @safe @nogc 126 bool is_alive() const 127 128 do 129 { 130 return this.state >= 0; 131 } 132 133 /** 134 * 135 * 136 * Params: 137 * ch = 138 */ 139 pure nothrow @safe @nogc 140 bool is_alone(S)(const ref S order) const 141 { 142 if (this.state < 0) { 143 return false; 144 } 145 146 for (size_t i = 0; i < order.length; i++) { 147 if (order[i].is_alive()) { 148 return false; 149 } 150 } 151 152 return true; 153 } 154 155 /** 156 * 157 * 158 * Params: 159 * ch = character to keep the score 160 */ 161 pure nothrow @trusted @nogc 162 void next(const ubyte ch) 163 164 do 165 { 166 if (this.is_alive()) { 167 immutable byte arc__ = (*this.states)[this.state][ch]; 168 169 if (arc__ < 0) { 170 this.state = -1; 171 } else { 172 this.state = (*this.arcs)[cast(size_t)(arc__)].next; 173 this.score *= (*this.arcs)[cast(size_t)(arc__)].score; 174 } 175 } 176 } 177 178 invariant 179 { 180 } 181 } 182 183 184 /** 185 * 186 * 187 * Params: 188 * order = 189 */ 190 pure nothrow @safe @nogc 191 bool dfa_none(S)(const ref S order) 192 193 do 194 { 195 for (size_t i = 0; i < order.length; i++) { 196 if (order[i].is_alive()) { 197 return false; 198 } 199 } 200 201 return true; 202 } 203 204 /** 205 * 206 * 207 * Params: 208 * order = 209 */ 210 pure nothrow @safe @nogc 211 libguess_d.encoding.libguess_encoding dfa_top(S)(const ref S order) 212 213 do 214 { 215 static import libguess_d.encoding; 216 217 const (guess_dfa)* top = null; 218 219 for (size_t i = 0; i < order.length; i++) { 220 if (order[i].is_alive()) { 221 if ((top == null) || (order[i].score > top.score)) { 222 top = &order[i]; 223 } 224 } 225 } 226 227 if (top != null) { 228 return (*top).name; 229 } else { 230 return libguess_d.encoding.libguess_encoding.undefined; 231 } 232 } 233 234 /** 235 * 236 * 237 * Params: 238 * input = 239 */ 240 pure nothrow @safe @nogc 241 libguess_d.encoding.libguess_encoding check_UTF16_BOM(const char[] input) 242 243 do 244 { 245 static import libguess_d.encoding; 246 247 /* special treatment of BOM */ 248 if (input.length > 1) { 249 if ((input[0] == 0xFF) && (input[1] == 0xFE)) { 250 return libguess_d.encoding.libguess_encoding.UCS_2LE; 251 } else if ((input[0] == 0xFE) && (input[1] == 0xFF)) { 252 return libguess_d.encoding.libguess_encoding.UCS_2BE; 253 } 254 } 255 256 return libguess_d.encoding.libguess_encoding.undefined; 257 } 258 259 /** 260 * 261 * 262 * Params: 263 * order = 264 * c = 265 */ 266 pure nothrow @trusted @nogc 267 libguess_d.encoding.libguess_encoding dfa_process(S)(ref S order, const ubyte c) 268 269 do 270 { 271 static import libguess_d.encoding; 272 273 for (size_t i = 0; i < order.length; i++) { 274 if (order[i].is_alive()) { 275 if (order[i].is_alone(order)) { 276 return order[i].name; 277 } 278 279 order[i].next(c); 280 } 281 } 282 283 return libguess_d.encoding.libguess_encoding.undefined; 284 }