summaryrefslogtreecommitdiff
path: root/thirdparty/lws/minilex.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/lws/minilex.c')
-rw-r--r--thirdparty/lws/minilex.c272
1 files changed, 272 insertions, 0 deletions
diff --git a/thirdparty/lws/minilex.c b/thirdparty/lws/minilex.c
new file mode 100644
index 0000000000..3cb1e33696
--- /dev/null
+++ b/thirdparty/lws/minilex.c
@@ -0,0 +1,272 @@
+/*
+ * minilex.c
+ *
+ * High efficiency lexical state parser
+ *
+ * Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
+ *
+ * Licensed under LGPL2
+ *
+ * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
+ *
+ * Run it twice to test parsing on the generated table on stderr
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lextable-strings.h"
+
+/*
+ * b7 = 0 = 1-byte seq
+ * 0x08 = fail
+ * 2-byte seq
+ * 0x00 - 0x07, then terminal as given in 2nd byte
+ 3-byte seq
+ * no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
+ * = 1 = 1-byte seq
+ * no match: die, match go fwd 1 byte
+ */
+
+unsigned char lextable[] = {
+ #include "lextable.h"
+};
+
+#define PARALLEL 30
+
+struct state {
+ char c[PARALLEL];
+ int state[PARALLEL];
+ int count;
+ int bytepos;
+
+ int real_pos;
+};
+
+struct state state[1000];
+int next = 1;
+
+#define FAIL_CHAR 0x08
+
+int lextable_decode(int pos, char c)
+{
+ while (1) {
+ if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
+ if ((lextable[pos] & 0x7f) != c)
+ return -1;
+ /* fall thru */
+ pos++;
+ if (lextable[pos] == FAIL_CHAR)
+ return -1;
+ return pos;
+ } else { /* b7 = 0, end or 3-byte */
+ if (lextable[pos] < FAIL_CHAR) /* terminal marker */
+ return pos;
+
+ if (lextable[pos] == c) /* goto */
+ return pos + (lextable[pos + 1]) +
+ (lextable[pos + 2] << 8);
+ /* fall thru goto */
+ pos += 3;
+ /* continue */
+ }
+ }
+}
+
+int main(void)
+{
+ int n = 0;
+ int m = 0;
+ int prev;
+ char c;
+ int walk;
+ int saw;
+ int y;
+ int j;
+ int pos = 0;
+
+ while (n < sizeof(set) / sizeof(set[0])) {
+
+ m = 0;
+ walk = 0;
+ prev = 0;
+
+ if (set[n][0] == '\0') {
+ n++;
+ continue;
+ }
+
+ while (set[n][m]) {
+
+ saw = 0;
+ for (y = 0; y < state[walk].count; y++)
+ if (state[walk].c[y] == set[n][m]) {
+ /* exists -- go forward */
+ walk = state[walk].state[y];
+ saw = 1;
+ break;
+ }
+
+ if (saw)
+ goto again;
+
+ /* something we didn't see before */
+
+ state[walk].c[state[walk].count] = set[n][m];
+
+ state[walk].state[state[walk].count] = next;
+ state[walk].count++;
+ walk = next++;
+again:
+ m++;
+ }
+
+ state[walk].c[0] = n++;
+ state[walk].state[0] = 0; /* terminal marker */
+ state[walk].count = 1;
+ }
+
+ walk = 0;
+ for (n = 0; n < next; n++) {
+ state[n].bytepos = walk;
+ walk += (2 * state[n].count);
+ }
+
+ /* compute everyone's position first */
+
+ pos = 0;
+ walk = 0;
+ for (n = 0; n < next; n++) {
+
+ state[n].real_pos = pos;
+
+ for (m = 0; m < state[n].count; m++) {
+
+ if (state[n].state[m] == 0)
+ pos += 2; /* terminal marker */
+ else { /* c is a character */
+ if ((state[state[n].state[m]].bytepos -
+ walk) == 2)
+ pos++;
+ else {
+ pos += 3;
+ if (m == state[n].count - 1)
+ pos++; /* fail */
+ }
+ }
+ walk += 2;
+ }
+ }
+
+ walk = 0;
+ pos = 0;
+ for (n = 0; n < next; n++) {
+ for (m = 0; m < state[n].count; m++) {
+
+ if (!m)
+ fprintf(stdout, "/* pos %04x: %3d */ ",
+ state[n].real_pos, n);
+ else
+ fprintf(stdout, " ");
+
+ y = state[n].c[m];
+ saw = state[n].state[m];
+
+ if (saw == 0) { // c is a terminal then
+
+ if (y > 0x7ff) {
+ fprintf(stderr, "terminal too big\n");
+ return 2;
+ }
+
+ fprintf(stdout, " 0x%02X, 0x%02X "
+ " "
+ "/* - terminal marker %2d - */,\n",
+ y >> 8, y & 0xff, y & 0x7f);
+ pos += 2;
+ walk += 2;
+ continue;
+ }
+
+ /* c is a character */
+
+ prev = y &0x7f;
+ if (prev < 32 || prev > 126)
+ prev = '.';
+
+
+ if ((state[saw].bytepos - walk) == 2) {
+ fprintf(stdout, " 0x%02X /* '%c' -> */,\n",
+ y | 0x80, prev);
+ pos++;
+ walk += 2;
+ continue;
+ }
+
+ j = state[saw].real_pos - pos;
+
+ if (j > 0xffff) {
+ fprintf(stderr,
+ "Jump > 64K bytes ahead (%d to %d)\n",
+ state[n].real_pos, state[saw].real_pos);
+ return 1;
+ }
+ fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X, 0x%02X "
+ "/* (to 0x%04X state %3d) */,\n",
+ y, prev,
+ j & 0xff, j >> 8,
+ state[saw].real_pos, saw);
+ pos += 3;
+
+ if (m == state[n].count - 1) {
+ fprintf(stdout,
+ " 0x%02X, /* fail */\n",
+ FAIL_CHAR);
+ pos++; /* fail */
+ }
+
+ walk += 2;
+ }
+ }
+
+ fprintf(stdout, "/* total size %d bytes */\n", pos);
+
+ /*
+ * Try to parse every legal input string
+ */
+
+ for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
+ walk = 0;
+ m = 0;
+ y = -1;
+
+ if (set[n][0] == '\0')
+ continue;
+
+ fprintf(stderr, " trying '%s'\n", set[n]);
+
+ while (set[n][m]) {
+ walk = lextable_decode(walk, set[n][m]);
+ if (walk < 0) {
+ fprintf(stderr, "failed\n");
+ return 3;
+ }
+
+ if (lextable[walk] < FAIL_CHAR) {
+ y = (lextable[walk] << 8) + lextable[walk + 1];
+ break;
+ }
+ m++;
+ }
+
+ if (y != n) {
+ fprintf(stderr, "decode failed %d\n", y);
+ return 4;
+ }
+ }
+
+ fprintf(stderr, "All decode OK\n");
+
+ return 0;
+}