aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tokenize.c
diff options
authorAl Viro <viro@ftp.linux.org.uk>2006-10-01 19:55:02 +0100
committerJosh Triplett <josh@freedesktop.org>2006-10-24 05:23:24 -0700
commit758c8a83cebba4e7e762792719d26246a8cf8ac2 (patch)
tree260f05cece59def5d65063abaac3628048b154ea /tokenize.c
parent70ada4fad4c4a9f59862677dc7f905482c4f4e99 (diff)
downloadsparse-dev-758c8a83cebba4e7e762792719d26246a8cf8ac2.tar.gz
switch to hash-based get_one_special()
Weird, but true: the set of C two-character punctuators and two-symbol prefixes of three-character punctuators is distinguishable by 5-bit hash function (27 out of 32). Application is obvious - we get much faster get_one_special() out of that... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Josh Triplett <josh@freedesktop.org>
Diffstat (limited to 'tokenize.c')
-rw-r--r--tokenize.c78
1 files changed, 66 insertions, 12 deletions
diff --git a/tokenize.c b/tokenize.c
index 497da13c..67f5a67a 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -283,7 +283,7 @@ got_eof:
* Slow path (including the logics with line-splicing and EOF sanity
* checks) is in nextchar_slow().
*/
-static int nextchar(stream_t *stream)
+static inline int nextchar(stream_t *stream)
{
int offset = stream->offset;
@@ -615,12 +615,68 @@ unsigned char combinations[][3] = COMBINATION_STRINGS;
#define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
+/* hash function for two-character punctuators - all give unique values */
+#define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
+
+/*
+ * note that we won't get false positives - special_hash(0,0) is 0 and
+ * entry 0 is filled (by +=), so all the missing ones are OK.
+ */
+static unsigned char hash_results[32][2] = {
+#define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
+ RES('+', '='), /* 00 */
+ RES('/', '='), /* 01 */
+ RES('^', '='), /* 05 */
+ RES('&', '&'), /* 07 */
+ RES('#', '#'), /* 08 */
+ RES('<', '<'), /* 0a */
+ RES('<', '='), /* 0c */
+ RES('!', '='), /* 0e */
+ RES('%', '='), /* 0f */
+ RES('-', '-'), /* 10 */
+ RES('-', '='), /* 11 */
+ RES('-', '>'), /* 13 */
+ RES('=', '='), /* 15 */
+ RES('&', '='), /* 17 */
+ RES('*', '='), /* 18 */
+ RES('.', '.'), /* 1a */
+ RES('+', '+'), /* 1b */
+ RES('|', '='), /* 1c */
+ RES('>', '='), /* 1d */
+ RES('|', '|'), /* 1e */
+ RES('>', '>') /* 1f */
+#undef RES
+};
+static int code[32] = {
+#define CODE(c0, c1, value) [special_hash(c0, c1)] = value
+ CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
+ CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
+ CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
+ CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
+ CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
+ CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
+ CODE('<', '=', SPECIAL_LTE), /* 0c */
+ CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
+ CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
+ CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
+ CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
+ CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
+ CODE('=', '=', SPECIAL_EQUAL), /* 15 */
+ CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
+ CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
+ CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
+ CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
+ CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
+ CODE('>', '=', SPECIAL_GTE), /* 1d */
+ CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
+ CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */
+#undef CODE
+};
+
static int get_one_special(int c, stream_t *stream)
{
struct token *token;
- unsigned char c1, c2, c3;
int next, value, i;
- unsigned char *comb;
next = nextchar(stream);
@@ -648,17 +704,15 @@ static int get_one_special(int c, stream_t *stream)
*/
value = c;
if (cclass[next + 1] & ValidSecond) {
- comb = combinations[0];
- c1 = c; c2 = next; c3 = 0;
- for (i = 0; i < NR_COMBINATIONS; i++) {
- if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
- value = i + SPECIAL_BASE;
+ i = special_hash(c, next);
+ if (hash_results[i][0] == c && hash_results[i][1] == next) {
+ value = code[i];
+ next = nextchar(stream);
+ if (value >= SPECIAL_LEFTSHIFT &&
+ next == "==."[value - SPECIAL_LEFTSHIFT]) {
+ value += 3;
next = nextchar(stream);
- if (c3)
- break;
- c3 = next;
}
- comb += 3;
}
}