1 /*
2  * This code is derivative of guess.c of Gauche-0.8.3.
3  * The following is the original copyright notice.
4  */
5 
6 /*
7  *   Copyright (c) 2000-2003 Shiro Kawai, All rights reserved.
8  *
9  *   Redistribution and use in source and binary forms, with or without
10  *   modification, are permitted provided that the following conditions
11  *   are met:
12  *
13  *   1. Redistributions of source code must retain the above copyright
14  *      notice, this list of conditions and the following disclaimer.
15  *
16  *   2. Redistributions in binary form must reproduce the above copyright
17  *      notice, this list of conditions and the following disclaimer in the
18  *      documentation and/or other materials provided with the distribution.
19  *
20  *   3. Neither the name of the authors nor the names of its contributors
21  *      may be used to endorse or promote products derived from this
22  *      software without specific prior written permission.
23  *
24  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 /**
38  * 
39  *
40  * License: BSD 3-clause
41  */
42 module libguess_d.dfa;
43 
44 
45 private static import libguess_d.encoding;
46 
47 /**
48  * data types
49  */
50 package struct guess_arc
51 {
52 public:
53 	/**
54 	 * next state
55 	 */
56 	byte next;
57 
58 	/**
59 	 * score
60 	 */
61 	double score;
62 
63 	invariant
64 	{
65 		assert(this.next >= 0);
66 	}
67 }
68 
69 /**
70  *
71  */
72 package struct guess_dfa
73 {
74 	static import libguess_d.encoding;
75 
76 private:
77 	/**
78 	 *
79 	 */
80 	immutable byte[][256]* states;
81 
82 	/**
83 	 *
84 	 */
85 	immutable .guess_arc[]* arcs;
86 
87 	/**
88 	 *
89 	 */
90 	byte state = 0;
91 
92 	/**
93 	 *
94 	 */
95 	immutable libguess_d.encoding.libguess_encoding name;
96 
97 public:
98 	/**
99 	 *
100 	 */
101 	double score = 1.0;
102 
103 	/**
104 	 * init data
105 	 *
106 	 * Params:
107 	 *      st = guess_*_st pointer
108 	 *      ar = guess_*_ar pointer
109 	 */
110 	pragma(inline, true)
111 	pure nothrow @safe @nogc
112 	this(immutable byte[][256]* st, immutable .guess_arc[]* ar, const libguess_d.encoding.libguess_encoding name)
113 
114 		do
115 		{
116 			this.states = st;
117 			this.arcs = ar;
118 			this.name = name;
119 		}
120 
121 	/**
122 	 * check whether this.state is available
123 	 */
124 	pragma(inline, true)
125 	pure nothrow @safe @nogc
126 	bool is_alive() const
127 
128 		do
129 		{
130 			return this.state >= 0;
131 		}
132 
133 	/**
134 	 * 
135 	 *
136 	 * Params:
137 	 *      ch = 
138 	 */
139 	pure nothrow @safe @nogc
140 	bool is_alone(S)(const ref S order) const
141 	{
142 		if (this.state < 0) {
143 			return false;
144 		}
145 
146 		for (size_t i = 0; i < order.length; i++) {
147 			if (order[i].is_alive()) {
148 				return false;
149 			}
150 		}
151 
152 		return true;
153 	}
154 
155 	/**
156 	 * 
157 	 *
158 	 * Params:
159 	 *      ch = character to keep the score
160 	 */
161 	pure nothrow @trusted @nogc
162 	void next(const ubyte ch)
163 
164 		do
165 		{
166 			if (this.is_alive()) {
167 				immutable byte arc__ = (*this.states)[this.state][ch];
168 
169 				if (arc__ < 0) {
170 					this.state = -1;
171 				} else {
172 					this.state = (*this.arcs)[cast(size_t)(arc__)].next;
173 					this.score *= (*this.arcs)[cast(size_t)(arc__)].score;
174 				}
175 			}
176 		}
177 
178 	invariant
179 	{
180 	}
181 }
182 
183 
184 /**
185  * 
186  *
187  * Params:
188  *      order = 
189  */
190 pure nothrow @safe @nogc
191 bool dfa_none(S)(const ref S order)
192 
193 	do
194 	{
195 		for (size_t i = 0; i < order.length; i++) {
196 			if (order[i].is_alive()) {
197 				return false;
198 			}
199 		}
200 
201 		return true;
202 	}
203 
204 /**
205  * 
206  *
207  * Params:
208  *      order = 
209  */
210 pure nothrow @safe @nogc
211 libguess_d.encoding.libguess_encoding dfa_top(S)(const ref S order)
212 
213 	do
214 	{
215 		static import libguess_d.encoding;
216 
217 		const (guess_dfa)* top = null;
218 
219 		for (size_t i = 0; i < order.length; i++) {
220 			if (order[i].is_alive()) {
221 				if ((top == null) || (order[i].score > top.score)) {
222 					top = &order[i];
223 				}
224 			}
225 		}
226 
227 		if (top != null) {
228 			return (*top).name;
229 		} else {
230 			return libguess_d.encoding.libguess_encoding.undefined;
231 		}
232 	}
233 
234 /**
235  * 
236  *
237  * Params:
238  *      input = 
239  */
240 pure nothrow @safe @nogc
241 libguess_d.encoding.libguess_encoding check_UTF16_BOM(const char[] input)
242 
243 	do
244 	{
245 		static import libguess_d.encoding;
246 
247 		/* special treatment of BOM */
248 		if (input.length > 1) {
249 			if ((input[0] == 0xFF) && (input[1] == 0xFE)) {
250 				return libguess_d.encoding.libguess_encoding.UCS_2LE;
251 			} else if ((input[0] == 0xFE) && (input[1] == 0xFF)) {
252 				return libguess_d.encoding.libguess_encoding.UCS_2BE;
253 			}
254 		}
255 
256 		return libguess_d.encoding.libguess_encoding.undefined;
257 	}
258 
259 /**
260  * 
261  *
262  * Params:
263  *      order = 
264  *      c = 
265  */
266 pure nothrow @trusted @nogc
267 libguess_d.encoding.libguess_encoding dfa_process(S)(ref S order, const ubyte c)
268 
269 	do
270 	{
271 		static import libguess_d.encoding;
272 
273 		for (size_t i = 0; i < order.length; i++) {
274 			if (order[i].is_alive()) {
275 				if (order[i].is_alone(order)) {
276 					return order[i].name;
277 				}
278 
279 				order[i].next(c);
280 			}
281 		}
282 
283 		return libguess_d.encoding.libguess_encoding.undefined;
284 	}