aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
-rw-r--r--Makefile11
-rw-r--r--lib.c39
-rw-r--r--parse.c176
-rw-r--r--parse.h24
-rw-r--r--test-lexing.c92
-rw-r--r--test-parsing.c27
-rw-r--r--token.h6
-rw-r--r--tokenize.c113
8 files changed, 371 insertions, 117 deletions
diff --git a/Makefile b/Makefile
index 6599d016..4b22617b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,17 @@
CFLAGS=-g -Wall
-test-lexing: test-lexing.o tokenize.o
- gcc -o $@ test-lexing.o tokenize.o
+all: test-lexing test-parsing
+test-lexing: test-lexing.o tokenize.o lib.o
+ gcc -o $@ test-lexing.o tokenize.o lib.o
+
+test-parsing: test-parsing.o parse.o tokenize.o lib.o
+ gcc -o $@ test-parsing.o parse.o tokenize.o lib.o
+
+test-parsing: token.h
test-lexing.o: token.h
tokenize.o: token.h
+parse.o: token.h
clean:
rm -f *.o
diff --git a/lib.c b/lib.c
new file mode 100644
index 00000000..d0b9d661
--- /dev/null
+++ b/lib.c
@@ -0,0 +1,39 @@
+/*
+ * Helper routines
+ */
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "token.h"
+
+void warn(struct token *token, const char * fmt, ...)
+{
+ static char buffer[512];
+ struct stream *stream;
+
+ va_list args;
+ va_start(args, fmt);
+ vsprintf(buffer, fmt, args);
+ va_end(args);
+
+ stream = input_streams + token->stream;
+ fprintf(stderr, "warning: %s:%d: %s\n",
+ stream->name, token->line,
+ buffer);
+}
+
+
+void die(const char *fmt, ...)
+{
+ va_list args;
+ static char buffer[512];
+
+ va_start(args, fmt);
+ vsnprintf(buffer, sizeof(buffer), fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "%s\n", buffer);
+ exit(1);
+}
+
+
diff --git a/parse.c b/parse.c
new file mode 100644
index 00000000..f8fdc5be
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,176 @@
+/*
+ * Stupid C parser, version 1e-6.
+ *
+ * Let's see how hard this is to do.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "token.h"
+#include "parse.h"
+
+void show_expression(struct expression *expr)
+{
+ if (!expr)
+ return;
+
+ switch (expr->type) {
+ case EXPR_IDENT:
+ printf("%s", show_token(expr->token));
+ break;
+ case EXPR_BINOP:
+ printf("<");
+ show_expression(expr->left);
+ printf(" %s ", show_special(expr->op));
+ show_expression(expr->right);
+ printf(">");
+ break;
+ case EXPR_UNARY:
+ printf("(");
+ show_expression(expr->unop);
+ printf(")");
+ break;
+ default:
+ printf("WTF");
+ }
+}
+
+static struct expression *alloc_expression(struct token *token, int type)
+{
+ struct expression *expr = malloc(sizeof(struct expression));
+
+ if (!expr)
+ die("Unable to allocate expression");
+ memset(expr, 0, sizeof(*expr));
+ expr->type = type;
+ expr->token = token;
+ return expr;
+}
+
+struct token *cast_expression(struct token *token, struct expression **tree)
+{
+ struct expression *expr = NULL;
+
+ if (token) {
+ switch (token->value.type) {
+ case TOKEN_IDENT:
+ expr = alloc_expression(token, EXPR_IDENT);
+ token = token->next;
+ break;
+ case TOKEN_SPECIAL:
+ if (token->value.special == '(') {
+ expr = alloc_expression(token, EXPR_UNARY);
+ expr->op = '(';
+ token = parse_expression(token->next, &expr->unop);
+ if (!token || token->value.type != TOKEN_SPECIAL || token->value.special != ')')
+ warn(token, "Expected ')'");
+ else
+ token = token->next;
+ break;
+ }
+ default:
+ warn(token, "Syntax error");
+ }
+ }
+ *tree = expr;
+ return token;
+}
+
+/* Generic left-to-right binop parsing */
+struct token *lr_binop_expression(struct token *token, struct expression **tree,
+ struct token *(*inner)(struct token *, struct expression **), ...)
+{
+ struct expression *left = NULL;
+ struct token * next = inner(token, &left);
+
+ if (left) {
+ while (next && next->value.type == TOKEN_SPECIAL) {
+ struct expression *top, *right = NULL;
+ int op = next->value.special;
+ va_list args;
+
+ va_start(args, inner);
+ for (;;) {
+ int nextop = va_arg(args, int);
+ if (!nextop)
+ goto out;
+ if (op == nextop)
+ break;
+ }
+ va_end(args);
+ top = alloc_expression(next, EXPR_BINOP);
+ next = inner(next->next, &right);
+ if (!right) {
+ warn(token, "Syntax error");
+ break;
+ }
+ top->op = op;
+ top->left = left;
+ top->right = right;
+ left = top;
+ }
+ }
+out:
+ *tree = left;
+ return next;
+}
+
+struct token *multiplicative_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, cast_expression, '*', '/', '%', 0);
+}
+
+struct token *additive_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, multiplicative_expression, '+', '-', 0);
+}
+
+struct token *shift_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, additive_expression, SPECIAL_LEFTSHIFT, SPECIAL_RIGHTSHIFT, 0);
+}
+
+struct token *relational_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, shift_expression, '<', '>', SPECIAL_LTE, SPECIAL_GTE, 0);
+}
+
+struct token *equality_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, relational_expression, SPECIAL_EQUAL, SPECIAL_NOTEQUAL, 0);
+}
+
+struct token *bitwise_and_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, equality_expression, '&', 0);
+}
+
+struct token *bitwise_xor_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, bitwise_and_expression, '^', 0);
+}
+
+struct token *bitwise_or_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, bitwise_xor_expression, '|', 0);
+}
+
+struct token *logical_and_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, bitwise_or_expression, SPECIAL_LOGICAL_AND, 0);
+}
+
+struct token *logical_or_expression(struct token *token, struct expression **tree)
+{
+ return lr_binop_expression(token, tree, logical_and_expression, SPECIAL_LOGICAL_OR, 0);
+}
+
+struct token *parse_expression(struct token *token, struct expression **tree)
+{
+ return logical_or_expression(token,tree);
+}
diff --git a/parse.h b/parse.h
new file mode 100644
index 00000000..0a2e2ff8
--- /dev/null
+++ b/parse.h
@@ -0,0 +1,24 @@
+#ifndef PARSE_H
+#define PARSE_H
+
+enum expression_type {
+ EXPR_UNARY,
+ EXPR_BINOP,
+ EXPR_IDENT,
+};
+
+struct expression {
+ int type, op;
+ struct token *token;
+ union {
+ struct expression *unop;
+ struct binop_arg {
+ struct expression *left, *right;
+ };
+ };
+};
+
+extern struct token *parse_expression(struct token *, struct expression **);
+extern void show_expression(struct expression *);
+
+#endif /* PARSE_H */
diff --git a/test-lexing.c b/test-lexing.c
index 419a3bcc..cd6f6274 100644
--- a/test-lexing.c
+++ b/test-lexing.c
@@ -7,99 +7,9 @@
#include <fcntl.h>
#include "token.h"
-void die(const char *fmt, ...)
-{
- va_list args;
- static char buffer[512];
-
- va_start(args, fmt);
- vsnprintf(buffer, sizeof(buffer), fmt, args);
- va_end(args);
-
- fprintf(stderr, "%s\n", buffer);
- exit(1);
-}
-
-static char *show_value(struct value *value)
-{
- static char buffer[256];
-
- switch (value->type) {
- case TOKEN_ERROR:
- return "syntax error";
-
- case TOKEN_IDENT: {
- struct ident *ident = value->ident;
- sprintf(buffer, "%.*s", ident->len, ident->name);
- return buffer;
- }
-
- case TOKEN_STRING: {
- char *ptr;
- int i;
- struct string *string = value->string;
-
- ptr = buffer;
- *ptr++ = '"';
- for (i = 0; i < string->length; i++) {
- unsigned char c = string->data[i];
- if (isprint(c) && c != '"') {
- *ptr++ = c;
- continue;
- }
- *ptr++ = '\\';
- switch (c) {
- case '\n':
- *ptr++ = 'n';
- continue;
- case '\t':
- *ptr++ = 't';
- continue;
- case '"':
- *ptr++ = '"';
- continue;
- }
- if (!isdigit(string->data[i+1])) {
- ptr += sprintf(ptr, "%o", c);
- continue;
- }
-
- ptr += sprintf(ptr, "%03o", c);
- }
- *ptr++ = '"';
- *ptr = '\0';
- return buffer;
- }
-
- case TOKEN_INTEGER: {
- char *ptr;
- ptr = buffer + sprintf(buffer, "%llu", value->intval);
- return buffer;
- }
-
- case TOKEN_FP: {
- sprintf(buffer, "%f", value->fpval);
- return buffer;
- }
-
- case TOKEN_SPECIAL: {
- int val = value->special;
- static const char *combinations[] = COMBINATION_STRINGS;
- buffer[0] = val;
- buffer[1] = 0;
- if (val >= SPECIAL_BASE)
- strcpy(buffer, combinations[val - SPECIAL_BASE]);
- return buffer;
- }
-
- default:
- return "WTF???";
- }
-}
-
void callback(struct token *token)
{
- printf("%s ", show_value(&token->value));
+ printf("%s ", show_token(token));
}
int main(int argc, char **argv)
diff --git a/test-parsing.c b/test-parsing.c
new file mode 100644
index 00000000..6a16403d
--- /dev/null
+++ b/test-parsing.c
@@ -0,0 +1,27 @@
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "token.h"
+#include "parse.h"
+
+int main(int argc, char **argv)
+{
+ int fd = open(argv[1], O_RDONLY);
+ struct token *token;
+ struct expression *expr;
+
+ if (fd < 0)
+ die("No such file: %s", argv[1]);
+ token = tokenize(argv[1], fd);
+ token = parse_expression(token, &expr);
+ if (token)
+ warn(token, "Extra data");
+ show_expression(expr);
+ printf("\n");
+ return 0;
+}
diff --git a/token.h b/token.h
index f80e9a18..f9ca80e2 100644
--- a/token.h
+++ b/token.h
@@ -45,7 +45,7 @@ enum token_type {
"..", "...", \
"<=", "<<", "<<=", \
">=", ">>", ">>=", \
- "==", \
+ "==", "!=", \
"&&", "&=", \
"||", "|=", \
"^=", \
@@ -72,6 +72,7 @@ enum special_token {
SPECIAL_RIGHTSHIFT,
SPECIAL_SHR_ASSIGN,
SPECIAL_EQUAL,
+ SPECIAL_NOTEQUAL,
SPECIAL_LOGICAL_AND,
SPECIAL_AND_ASSIGN,
SPECIAL_LOGICAL_OR,
@@ -102,7 +103,10 @@ struct token {
struct token *next;
};
+extern const char *show_special(int op);
+extern const char *show_token(const struct token *token);
extern struct token * tokenize(const char *, int);
extern void die(const char *, ...);
+extern void warn(struct token *, const char *, ...);
#endif
diff --git a/tokenize.c b/tokenize.c
index 02768b6d..d7670a20 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -21,6 +21,90 @@ int input_stream_nr = 0;
struct stream *input_streams;
static int input_streams_allocated;
+const char *show_special(int val)
+{
+ static const char *combinations[] = COMBINATION_STRINGS;
+ static char buffer[4];
+
+ buffer[0] = val;
+ buffer[1] = 0;
+ if (val >= SPECIAL_BASE)
+ strcpy(buffer, combinations[val - SPECIAL_BASE]);
+ return buffer;
+}
+
+
+const char *show_token(const struct token *token)
+{
+ static char buffer[256];
+ const struct value *value = &token->value;
+
+ switch (value->type) {
+ case TOKEN_ERROR:
+ return "syntax error";
+
+ case TOKEN_IDENT: {
+ struct ident *ident = value->ident;
+ sprintf(buffer, "%.*s", ident->len, ident->name);
+ return buffer;
+ }
+
+ case TOKEN_STRING: {
+ char *ptr;
+ int i;
+ struct string *string = value->string;
+
+ ptr = buffer;
+ *ptr++ = '"';
+ for (i = 0; i < string->length; i++) {
+ unsigned char c = string->data[i];
+ if (isprint(c) && c != '"') {
+ *ptr++ = c;
+ continue;
+ }
+ *ptr++ = '\\';
+ switch (c) {
+ case '\n':
+ *ptr++ = 'n';
+ continue;
+ case '\t':
+ *ptr++ = 't';
+ continue;
+ case '"':
+ *ptr++ = '"';
+ continue;
+ }
+ if (!isdigit(string->data[i+1])) {
+ ptr += sprintf(ptr, "%o", c);
+ continue;
+ }
+
+ ptr += sprintf(ptr, "%03o", c);
+ }
+ *ptr++ = '"';
+ *ptr = '\0';
+ return buffer;
+ }
+
+ case TOKEN_INTEGER: {
+ char *ptr;
+ ptr = buffer + sprintf(buffer, "%llu", value->intval);
+ return buffer;
+ }
+
+ case TOKEN_FP: {
+ sprintf(buffer, "%f", value->fpval);
+ return buffer;
+ }
+
+ case TOKEN_SPECIAL:
+ return show_special(value->special);
+
+ default:
+ return "WTF???";
+ }
+}
+
static int init_stream(const char *name)
{
int stream = input_stream_nr;
@@ -69,23 +153,6 @@ static int nextchar(action_t *action)
return c;
}
-static void warn(action_t *action, const char *fmt, ...)
-{
- static char buffer[512];
- struct stream *stream;
- struct token *token = action->token;
-
- va_list args;
- va_start(args, fmt);
- vsprintf(buffer, fmt, args);
- va_end(args);
-
- stream = input_streams + token->stream;
- fprintf(stderr, "warning: %s:%d: %s\n",
- stream->name, token->line,
- buffer);
-}
-
static void add_token(action_t *action)
{
struct token *token = action->token;
@@ -204,7 +271,7 @@ static int escapechar(int first, int type, action_t *action, int *valp)
value = first;
if (first == '\n')
- warn(action, "Newline in string or character constant");
+ warn(action->token, "Newline in string or character constant");
if (first == '\\' && next != EOF) {
value = next;
@@ -246,7 +313,7 @@ static int escapechar(int first, int type, action_t *action, int *valp)
}
/* Fallthrough */
default:
- warn(action, "Unknown escape '%c'", value);
+ warn(action->token, "Unknown escape '%c'", value);
}
}
/* Mark it as escaped */
@@ -263,7 +330,7 @@ static int get_char_token(int next, action_t *action)
next = escapechar(next, '\'', action, &value);
if (value == '\'' || next != '\'') {
- warn(action, "Bad character constant");
+ warn(action->token, "Bad character constant");
drop_token(action);
return next;
}
@@ -289,7 +356,7 @@ static int get_string_token(int next, action_t *action)
if (val == '"')
break;
if (next == EOF) {
- warn(action, "Enf of file in middle of string");
+ warn(action->token, "Enf of file in middle of string");
return next;
}
if (len < sizeof(buffer)) {
@@ -300,7 +367,7 @@ static int get_string_token(int next, action_t *action)
}
if (len > 256)
- warn(action, "String too long");
+ warn(action->token, "String too long");
string = malloc(sizeof(int)+len);
memcpy(string->data, buffer, len);
@@ -336,7 +403,7 @@ static int drop_stream_comment(action_t *action)
for (;;) {
int curr = next;
if (curr == EOF) {
- warn(action, "End of file in the middle of a comment");
+ warn(action->token, "End of file in the middle of a comment");
return curr;
}
next = nextchar(action);