diff options
| -rw-r--r-- | Makefile | 11 | ||||
| -rw-r--r-- | lib.c | 39 | ||||
| -rw-r--r-- | parse.c | 176 | ||||
| -rw-r--r-- | parse.h | 24 | ||||
| -rw-r--r-- | test-lexing.c | 92 | ||||
| -rw-r--r-- | test-parsing.c | 27 | ||||
| -rw-r--r-- | token.h | 6 | ||||
| -rw-r--r-- | tokenize.c | 113 |
8 files changed, 371 insertions, 117 deletions
@@ -1,10 +1,17 @@ CFLAGS=-g -Wall -test-lexing: test-lexing.o tokenize.o - gcc -o $@ test-lexing.o tokenize.o +all: test-lexing test-parsing +test-lexing: test-lexing.o tokenize.o lib.o + gcc -o $@ test-lexing.o tokenize.o lib.o + +test-parsing: test-parsing.o parse.o tokenize.o lib.o + gcc -o $@ test-parsing.o parse.o tokenize.o lib.o + +test-parsing: token.h test-lexing.o: token.h tokenize.o: token.h +parse.o: token.h clean: rm -f *.o @@ -0,0 +1,39 @@ +/* + * Helper routines + */ +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include "token.h" + +void warn(struct token *token, const char * fmt, ...) +{ + static char buffer[512]; + struct stream *stream; + + va_list args; + va_start(args, fmt); + vsprintf(buffer, fmt, args); + va_end(args); + + stream = input_streams + token->stream; + fprintf(stderr, "warning: %s:%d: %s\n", + stream->name, token->line, + buffer); +} + + +void die(const char *fmt, ...) +{ + va_list args; + static char buffer[512]; + + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + + fprintf(stderr, "%s\n", buffer); + exit(1); +} + + diff --git a/parse.c b/parse.c new file mode 100644 index 00000000..f8fdc5be --- /dev/null +++ b/parse.c @@ -0,0 +1,176 @@ +/* + * Stupid C parser, version 1e-6. + * + * Let's see how hard this is to do. + */ +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <fcntl.h> + +#include "token.h" +#include "parse.h" + +void show_expression(struct expression *expr) +{ + if (!expr) + return; + + switch (expr->type) { + case EXPR_IDENT: + printf("%s", show_token(expr->token)); + break; + case EXPR_BINOP: + printf("<"); + show_expression(expr->left); + printf(" %s ", show_special(expr->op)); + show_expression(expr->right); + printf(">"); + break; + case EXPR_UNARY: + printf("("); + show_expression(expr->unop); + printf(")"); + break; + default: + printf("WTF"); + } +} + +static struct expression *alloc_expression(struct token *token, int type) +{ + struct expression *expr = malloc(sizeof(struct expression)); + + if (!expr) + die("Unable to allocate expression"); + memset(expr, 0, sizeof(*expr)); + expr->type = type; + expr->token = token; + return expr; +} + +struct token *cast_expression(struct token *token, struct expression **tree) +{ + struct expression *expr = NULL; + + if (token) { + switch (token->value.type) { + case TOKEN_IDENT: + expr = alloc_expression(token, EXPR_IDENT); + token = token->next; + break; + case TOKEN_SPECIAL: + if (token->value.special == '(') { + expr = alloc_expression(token, EXPR_UNARY); + expr->op = '('; + token = parse_expression(token->next, &expr->unop); + if (!token || token->value.type != TOKEN_SPECIAL || token->value.special != ')') + warn(token, "Expected ')'"); + else + token = token->next; + break; + } + default: + warn(token, "Syntax error"); + } + } + *tree = expr; + return token; +} + +/* Generic left-to-right binop parsing */ +struct token *lr_binop_expression(struct token *token, struct expression **tree, + struct token *(*inner)(struct token *, struct expression **), ...) +{ + struct expression *left = NULL; + struct token * next = inner(token, &left); + + if (left) { + while (next && next->value.type == TOKEN_SPECIAL) { + struct expression *top, *right = NULL; + int op = next->value.special; + va_list args; + + va_start(args, inner); + for (;;) { + int nextop = va_arg(args, int); + if (!nextop) + goto out; + if (op == nextop) + break; + } + va_end(args); + top = alloc_expression(next, EXPR_BINOP); + next = inner(next->next, &right); + if (!right) { + warn(token, "Syntax error"); + break; + } + top->op = op; + top->left = left; + top->right = right; + left = top; + } + } +out: + *tree = left; + return next; +} + +struct token *multiplicative_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, cast_expression, '*', '/', '%', 0); +} + +struct token *additive_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, multiplicative_expression, '+', '-', 0); +} + +struct token *shift_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, additive_expression, SPECIAL_LEFTSHIFT, SPECIAL_RIGHTSHIFT, 0); +} + +struct token *relational_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, shift_expression, '<', '>', SPECIAL_LTE, SPECIAL_GTE, 0); +} + +struct token *equality_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, relational_expression, SPECIAL_EQUAL, SPECIAL_NOTEQUAL, 0); +} + +struct token *bitwise_and_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, equality_expression, '&', 0); +} + +struct token *bitwise_xor_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, bitwise_and_expression, '^', 0); +} + +struct token *bitwise_or_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, bitwise_xor_expression, '|', 0); +} + +struct token *logical_and_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, bitwise_or_expression, SPECIAL_LOGICAL_AND, 0); +} + +struct token *logical_or_expression(struct token *token, struct expression **tree) +{ + return lr_binop_expression(token, tree, logical_and_expression, SPECIAL_LOGICAL_OR, 0); +} + +struct token *parse_expression(struct token *token, struct expression **tree) +{ + return logical_or_expression(token,tree); +} diff --git a/parse.h b/parse.h new file mode 100644 index 00000000..0a2e2ff8 --- /dev/null +++ b/parse.h @@ -0,0 +1,24 @@ +#ifndef PARSE_H +#define PARSE_H + +enum expression_type { + EXPR_UNARY, + EXPR_BINOP, + EXPR_IDENT, +}; + +struct expression { + int type, op; + struct token *token; + union { + struct expression *unop; + struct binop_arg { + struct expression *left, *right; + }; + }; +}; + +extern struct token *parse_expression(struct token *, struct expression **); +extern void show_expression(struct expression *); + +#endif /* PARSE_H */ diff --git a/test-lexing.c b/test-lexing.c index 419a3bcc..cd6f6274 100644 --- a/test-lexing.c +++ b/test-lexing.c @@ -7,99 +7,9 @@ #include <fcntl.h> #include "token.h" -void die(const char *fmt, ...) -{ - va_list args; - static char buffer[512]; - - va_start(args, fmt); - vsnprintf(buffer, sizeof(buffer), fmt, args); - va_end(args); - - fprintf(stderr, "%s\n", buffer); - exit(1); -} - -static char *show_value(struct value *value) -{ - static char buffer[256]; - - switch (value->type) { - case TOKEN_ERROR: - return "syntax error"; - - case TOKEN_IDENT: { - struct ident *ident = value->ident; - sprintf(buffer, "%.*s", ident->len, ident->name); - return buffer; - } - - case TOKEN_STRING: { - char *ptr; - int i; - struct string *string = value->string; - - ptr = buffer; - *ptr++ = '"'; - for (i = 0; i < string->length; i++) { - unsigned char c = string->data[i]; - if (isprint(c) && c != '"') { - *ptr++ = c; - continue; - } - *ptr++ = '\\'; - switch (c) { - case '\n': - *ptr++ = 'n'; - continue; - case '\t': - *ptr++ = 't'; - continue; - case '"': - *ptr++ = '"'; - continue; - } - if (!isdigit(string->data[i+1])) { - ptr += sprintf(ptr, "%o", c); - continue; - } - - ptr += sprintf(ptr, "%03o", c); - } - *ptr++ = '"'; - *ptr = '\0'; - return buffer; - } - - case TOKEN_INTEGER: { - char *ptr; - ptr = buffer + sprintf(buffer, "%llu", value->intval); - return buffer; - } - - case TOKEN_FP: { - sprintf(buffer, "%f", value->fpval); - return buffer; - } - - case TOKEN_SPECIAL: { - int val = value->special; - static const char *combinations[] = COMBINATION_STRINGS; - buffer[0] = val; - buffer[1] = 0; - if (val >= SPECIAL_BASE) - strcpy(buffer, combinations[val - SPECIAL_BASE]); - return buffer; - } - - default: - return "WTF???"; - } -} - void callback(struct token *token) { - printf("%s ", show_value(&token->value)); + printf("%s ", show_token(token)); } int main(int argc, char **argv) diff --git a/test-parsing.c b/test-parsing.c new file mode 100644 index 00000000..6a16403d --- /dev/null +++ b/test-parsing.c @@ -0,0 +1,27 @@ +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <fcntl.h> + +#include "token.h" +#include "parse.h" + +int main(int argc, char **argv) +{ + int fd = open(argv[1], O_RDONLY); + struct token *token; + struct expression *expr; + + if (fd < 0) + die("No such file: %s", argv[1]); + token = tokenize(argv[1], fd); + token = parse_expression(token, &expr); + if (token) + warn(token, "Extra data"); + show_expression(expr); + printf("\n"); + return 0; +} @@ -45,7 +45,7 @@ enum token_type { "..", "...", \ "<=", "<<", "<<=", \ ">=", ">>", ">>=", \ - "==", \ + "==", "!=", \ "&&", "&=", \ "||", "|=", \ "^=", \ @@ -72,6 +72,7 @@ enum special_token { SPECIAL_RIGHTSHIFT, SPECIAL_SHR_ASSIGN, SPECIAL_EQUAL, + SPECIAL_NOTEQUAL, SPECIAL_LOGICAL_AND, SPECIAL_AND_ASSIGN, SPECIAL_LOGICAL_OR, @@ -102,7 +103,10 @@ struct token { struct token *next; }; +extern const char *show_special(int op); +extern const char *show_token(const struct token *token); extern struct token * tokenize(const char *, int); extern void die(const char *, ...); +extern void warn(struct token *, const char *, ...); #endif @@ -21,6 +21,90 @@ int input_stream_nr = 0; struct stream *input_streams; static int input_streams_allocated; +const char *show_special(int val) +{ + static const char *combinations[] = COMBINATION_STRINGS; + static char buffer[4]; + + buffer[0] = val; + buffer[1] = 0; + if (val >= SPECIAL_BASE) + strcpy(buffer, combinations[val - SPECIAL_BASE]); + return buffer; +} + + +const char *show_token(const struct token *token) +{ + static char buffer[256]; + const struct value *value = &token->value; + + switch (value->type) { + case TOKEN_ERROR: + return "syntax error"; + + case TOKEN_IDENT: { + struct ident *ident = value->ident; + sprintf(buffer, "%.*s", ident->len, ident->name); + return buffer; + } + + case TOKEN_STRING: { + char *ptr; + int i; + struct string *string = value->string; + + ptr = buffer; + *ptr++ = '"'; + for (i = 0; i < string->length; i++) { + unsigned char c = string->data[i]; + if (isprint(c) && c != '"') { + *ptr++ = c; + continue; + } + *ptr++ = '\\'; + switch (c) { + case '\n': + *ptr++ = 'n'; + continue; + case '\t': + *ptr++ = 't'; + continue; + case '"': + *ptr++ = '"'; + continue; + } + if (!isdigit(string->data[i+1])) { + ptr += sprintf(ptr, "%o", c); + continue; + } + + ptr += sprintf(ptr, "%03o", c); + } + *ptr++ = '"'; + *ptr = '\0'; + return buffer; + } + + case TOKEN_INTEGER: { + char *ptr; + ptr = buffer + sprintf(buffer, "%llu", value->intval); + return buffer; + } + + case TOKEN_FP: { + sprintf(buffer, "%f", value->fpval); + return buffer; + } + + case TOKEN_SPECIAL: + return show_special(value->special); + + default: + return "WTF???"; + } +} + static int init_stream(const char *name) { int stream = input_stream_nr; @@ -69,23 +153,6 @@ static int nextchar(action_t *action) return c; } -static void warn(action_t *action, const char *fmt, ...) -{ - static char buffer[512]; - struct stream *stream; - struct token *token = action->token; - - va_list args; - va_start(args, fmt); - vsprintf(buffer, fmt, args); - va_end(args); - - stream = input_streams + token->stream; - fprintf(stderr, "warning: %s:%d: %s\n", - stream->name, token->line, - buffer); -} - static void add_token(action_t *action) { struct token *token = action->token; @@ -204,7 +271,7 @@ static int escapechar(int first, int type, action_t *action, int *valp) value = first; if (first == '\n') - warn(action, "Newline in string or character constant"); + warn(action->token, "Newline in string or character constant"); if (first == '\\' && next != EOF) { value = next; @@ -246,7 +313,7 @@ static int escapechar(int first, int type, action_t *action, int *valp) } /* Fallthrough */ default: - warn(action, "Unknown escape '%c'", value); + warn(action->token, "Unknown escape '%c'", value); } } /* Mark it as escaped */ @@ -263,7 +330,7 @@ static int get_char_token(int next, action_t *action) next = escapechar(next, '\'', action, &value); if (value == '\'' || next != '\'') { - warn(action, "Bad character constant"); + warn(action->token, "Bad character constant"); drop_token(action); return next; } @@ -289,7 +356,7 @@ static int get_string_token(int next, action_t *action) if (val == '"') break; if (next == EOF) { - warn(action, "Enf of file in middle of string"); + warn(action->token, "Enf of file in middle of string"); return next; } if (len < sizeof(buffer)) { @@ -300,7 +367,7 @@ static int get_string_token(int next, action_t *action) } if (len > 256) - warn(action, "String too long"); + warn(action->token, "String too long"); string = malloc(sizeof(int)+len); memcpy(string->data, buffer, len); @@ -336,7 +403,7 @@ static int drop_stream_comment(action_t *action) for (;;) { int curr = next; if (curr == EOF) { - warn(action, "End of file in the middle of a comment"); + warn(action->token, "End of file in the middle of a comment"); return curr; } next = nextchar(action); |
