diff options
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | parse.c | 75 | ||||
| -rw-r--r-- | parse.h | 5 | ||||
| -rw-r--r-- | symbol.c | 33 | ||||
| -rw-r--r-- | symbol.h | 37 | ||||
| -rw-r--r-- | test-lexing.c | 2 | ||||
| -rw-r--r-- | test-parsing.c | 2 | ||||
| -rw-r--r-- | token.h | 15 | ||||
| -rw-r--r-- | tokenize.c | 81 |
9 files changed, 197 insertions, 58 deletions
@@ -7,13 +7,14 @@ all: $(PROGRAMS) test-lexing: test-lexing.o tokenize.o lib.o gcc -o $@ test-lexing.o tokenize.o lib.o -test-parsing: test-parsing.o parse.o tokenize.o lib.o - gcc -o $@ test-parsing.o parse.o tokenize.o lib.o +test-parsing: test-parsing.o parse.o tokenize.o symbol.o lib.o + gcc -o $@ test-parsing.o parse.o tokenize.o symbol.o lib.o test-parsing.o: token.h parse.h test-lexing.o: token.h tokenize.o: token.h parse.o: token.h parse.h +symbol.o: symbol.h token.h parse.h clean: rm -f *.[oasi] $(PROGRAMS) @@ -13,45 +13,46 @@ #include "token.h" #include "parse.h" +#include "symbol.h" void show_expression(struct expression *expr) { if (!expr) return; + printf("< "); switch (expr->type) { case EXPR_BINOP: - printf("< "); show_expression(expr->left); printf(" %s ", show_special(expr->op)); show_expression(expr->right); - printf(" >"); break; case EXPR_PREOP: - printf("( "); printf(" %s ", show_special(expr->op)); show_expression(expr->unop); - printf(" )"); break; case EXPR_POSTOP: - printf("( "); show_expression(expr->unop); printf(" %s ", show_special(expr->op)); - printf(" )"); break; case EXPR_PRIMARY: printf("%s", show_token(expr->token)); break; case EXPR_DEREF: - printf("< "); show_expression(expr->deref); printf("%s", show_special(expr->op)); printf("%s", show_token(expr->member)); - printf(" >"); + break; + case EXPR_CAST: + printf("("); + show_expression(expr->cast_type); + printf(")"); + show_expression(expr->cast_expression); break; default: printf("WTF"); } + printf(" >"); } static struct expression *alloc_expression(struct token *token, int type) @@ -66,11 +67,14 @@ static struct expression *alloc_expression(struct token *token, int type) return expr; } +static int match_op(struct token *token, int op) +{ + return token && token->type == TOKEN_SPECIAL && token->special == op; +} + static struct token *expect(struct token *token, int op) { - if (!token || - token->value.type != TOKEN_SPECIAL || - token->value.special != op) { + if (!match_op(token, op)) { warn(token, "Expected %s", show_special(op)); return token; } @@ -83,7 +87,13 @@ static struct token *primary_expression(struct token *token, struct expression * { struct expression *expr = NULL; - switch (token->value.type) { + if (!token) { + warn(token, "unexpected end of file"); + *tree = NULL; + return token; + } + + switch (token->type) { case TOKEN_IDENT: case TOKEN_INTEGER: case TOKEN_FP: @@ -93,7 +103,7 @@ static struct token *primary_expression(struct token *token, struct expression * break; case TOKEN_SPECIAL: - if (token->value.special == '(') { + if (token->special == '(') { expr = alloc_expression(token, EXPR_PREOP); expr->op = '('; token = parse_expression(token->next, &expr->unop); @@ -113,8 +123,8 @@ static struct token *postfix_expression(struct token *token, struct expression * struct expression *expr = NULL; token = primary_expression(token, &expr); - while (expr && token && token->value.type == TOKEN_SPECIAL) { - switch (token->value.special) { + while (expr && token && token->type == TOKEN_SPECIAL) { + switch (token->special) { case '[': { /* Array dereference */ struct expression *array_expr = alloc_expression(token, EXPR_BINOP); array_expr->op = '['; @@ -127,7 +137,7 @@ static struct token *postfix_expression(struct token *token, struct expression * case SPECIAL_INCREMENT: /* Post-increment */ case SPECIAL_DECREMENT: { /* Post-decrement */ struct expression *post = alloc_expression(token, EXPR_POSTOP); - post->op = token->value.special; + post->op = token->special; post->unop = expr; expr = post; token = token->next; @@ -136,10 +146,10 @@ static struct token *postfix_expression(struct token *token, struct expression * case '.': /* Structure member dereference */ case SPECIAL_DEREFERENCE: { /* Structure pointer member dereference */ struct expression *deref = alloc_expression(token, EXPR_DEREF); - deref->op = token->value.special; + deref->op = token->special; deref->deref = expr; token = token->next; - if (!token || token->value.type != TOKEN_IDENT) { + if (!token || token->type != TOKEN_IDENT) { warn(token, "Expected member name"); break; } @@ -173,8 +183,33 @@ static struct token *unary_expression(struct token *token, struct expression **t return postfix_expression(token, tree); } +/* This is bogus, but before I have real types. */ +static struct token *typename_expression(struct token *token, struct expression **tree) +{ + return parse_expression(token,tree); +} + +/* + * Ambiguity: a '(' can be either a cast-expression or + * a primary-expression depending on whether it is followed + * by a type or not. + */ static struct token *cast_expression(struct token *token, struct expression **tree) { + if (match_op(token, '(')) { + struct token *next = token->next; + if (next && next->type == TOKEN_IDENT) { + struct symbol *sym = next->ident->symbol; + if (sym && symbol_is_typename(sym)) { + struct expression *cast = alloc_expression(next, EXPR_CAST); + token = typename_expression(next, &cast->cast_type); + token = expect(token, ')'); + token = cast_expression(token, &cast->cast_expression); + *tree = cast; + return token; + } + } + } return unary_expression(token, tree); } @@ -186,9 +221,9 @@ static struct token *lr_binop_expression(struct token *token, struct expression struct token * next = inner(token, &left); if (left) { - while (next && next->value.type == TOKEN_SPECIAL) { + while (next && next->type == TOKEN_SPECIAL) { struct expression *top, *right = NULL; - int op = next->value.special; + int op = next->special; va_list args; va_start(args, inner); @@ -7,6 +7,7 @@ enum expression_type { EXPR_DEREF, EXPR_PREOP, EXPR_POSTOP, + EXPR_CAST, }; struct expression { @@ -21,6 +22,10 @@ struct expression { struct expression *deref; struct token *member; }; + struct cast_arg { + struct expression *cast_type; + struct expression *cast_expression; + }; }; }; diff --git a/symbol.c b/symbol.c new file mode 100644 index 00000000..ed09a167 --- /dev/null +++ b/symbol.c @@ -0,0 +1,33 @@ +#include <stdlib.h> +#include "token.h" +#include "symbol.h" + +struct symbol *alloc_symbol(struct token *token, int type) +{ + struct symbol *sym = malloc(sizeof(struct symbol)); + struct ident *ident; + + if (token->type != TOKEN_IDENT) + die("Internal error: trying to make a symbol out of a non-identifier"); + ident = token->ident; + if (!sym) + die("out of memory for symbol information"); + sym->token = token; + sym->next = ident->symbol; + sym->type = type; + ident->symbol = sym; + return sym; +} + +struct symbol *create_symbol(int stream, const char *name, int type) +{ + return alloc_symbol(built_in_token(stream, name), type); +} + +void init_symbols(void) +{ + int stream = init_stream("builtin"); + struct symbol *sym; + + sym = create_symbol(stream, "int", SYM_TYPEDEF); +} diff --git a/symbol.h b/symbol.h new file mode 100644 index 00000000..cb3de36e --- /dev/null +++ b/symbol.h @@ -0,0 +1,37 @@ +#ifndef SEMANTIC_H +#define SEMANTIC_H + +#include "token.h" + +/* + * An identifier with semantic meaning is a "symbol". + * + * There's a 1:n relationship: each symbol is always + * associated with one identifier, while each identifier + * can have one or more semantic meanings due to C scope + * rules. + * + * The progression is symbol -> token -> identifier. The + * token contains the information on where the symbol was + * declared. + */ +struct symbol { + struct token *token; /* Where this symbol was declared */ + struct symbol *next; /* Next semantic symbol that shares this identifier */ + int type; +}; + +enum symbol_types { + SYM_NONE = 0, /* regular variable */ + SYM_MEMBER, /* structure member */ + SYM_TYPEDEF, /* typedef */ + SYM_SPECIFIER, /* specifier */ + SYM_QUALIFIER, /* type qualifier */ +}; + +#define symbol_is_typename(sym) ((sym)->type >= SYM_TYPEDEF) + +void init_symbols(void); + +#endif /* SEMANTIC_H */ + diff --git a/test-lexing.c b/test-lexing.c index 9db756ab..007f404c 100644 --- a/test-lexing.c +++ b/test-lexing.c @@ -5,6 +5,7 @@ #include <ctype.h> #include <unistd.h> #include <fcntl.h> + #include "token.h" void callback(struct token *token) @@ -20,6 +21,7 @@ int main(int argc, char **argv) if (fd < 0) die("No such file: %s", argv[1]); + token = tokenize(argv[1], fd); line = token->line; while (token) { diff --git a/test-parsing.c b/test-parsing.c index 6a16403d..4a994851 100644 --- a/test-parsing.c +++ b/test-parsing.c @@ -8,6 +8,7 @@ #include "token.h" #include "parse.h" +#include "symbol.h" int main(int argc, char **argv) { @@ -17,6 +18,7 @@ int main(int argc, char **argv) if (fd < 0) die("No such file: %s", argv[1]); + init_symbols(); token = tokenize(argv[1], fd); token = parse_expression(token, &expr); if (token) @@ -85,7 +85,11 @@ struct string { char data[]; }; -struct value { +struct token { + unsigned int line; + unsigned int pos:16,stream:8,len:8; + struct token *next; + enum token_type type; union { double fpval; @@ -96,13 +100,8 @@ struct value { }; }; -struct token { - unsigned int line; - unsigned int pos:16,stream:8,len:8; - struct value value; - struct token *next; -}; - +extern int init_stream(const char *); +extern struct token *built_in_token(int, const char *); extern const char *show_special(int op); extern const char *show_token(const struct token *token); extern struct token * tokenize(const char *, int); @@ -37,14 +37,13 @@ const char *show_special(int val) const char *show_token(const struct token *token) { static char buffer[256]; - const struct value *value = &token->value; - switch (value->type) { + switch (token->type) { case TOKEN_ERROR: return "syntax error"; case TOKEN_IDENT: { - struct ident *ident = value->ident; + struct ident *ident = token->ident; sprintf(buffer, "%.*s", ident->len, ident->name); return buffer; } @@ -52,7 +51,7 @@ const char *show_token(const struct token *token) case TOKEN_STRING: { char *ptr; int i; - struct string *string = value->string; + struct string *string = token->string; ptr = buffer; *ptr++ = '"'; @@ -88,24 +87,24 @@ const char *show_token(const struct token *token) case TOKEN_INTEGER: { char *ptr; - ptr = buffer + sprintf(buffer, "%llu", value->intval); + ptr = buffer + sprintf(buffer, "%llu", token->intval); return buffer; } case TOKEN_FP: { - sprintf(buffer, "%f", value->fpval); + sprintf(buffer, "%f", token->fpval); return buffer; } case TOKEN_SPECIAL: - return show_special(value->special); + return show_special(token->special); default: return "WTF???"; } } -static int init_stream(const char *name) +int init_stream(const char *name) { int stream = input_stream_nr; @@ -121,6 +120,19 @@ static int init_stream(const char *name) return stream; } +struct token * alloc_token(int stream, int line, int pos) +{ + struct token *token = malloc(sizeof(struct token)); + if (!token) + die("Out of memory for token"); + + memset(token, 0, sizeof(struct token)); + token->line = line; + token->pos = pos; + token->stream = stream; + return token; +} + #define BUFSIZE (4096) typedef struct { int fd, line, pos, offset, size; @@ -175,8 +187,8 @@ static int do_integer(unsigned long long value, int next, action_t *action) { struct token *token = action->token; - token->value.type = TOKEN_INTEGER; - token->value.intval = value; + token->type = TOKEN_INTEGER; + token->intval = value; add_token(action); return next; } @@ -336,8 +348,8 @@ static int get_char_token(int next, action_t *action) } token = action->token; - token->value.type = TOKEN_INTEGER; - token->value.intval = value & 0xff; + token->type = TOKEN_INTEGER; + token->intval = value & 0xff; add_token(action); return nextchar(action); @@ -375,8 +387,8 @@ static int get_string_token(int next, action_t *action) /* Pass it on.. */ token = action->token; - token->value.type = TOKEN_STRING; - token->value.string = string; + token->type = TOKEN_STRING; + token->string = string; add_token(action); return next; @@ -461,8 +473,8 @@ static int get_one_special(int c, action_t *action) /* Pass it on.. */ token = action->token; - token->value.type = TOKEN_SPECIAL; - token->value.special = value; + token->type = TOKEN_SPECIAL; + token->special = value; add_token(action); return next; } @@ -534,6 +546,28 @@ static struct ident *create_hashed_ident(const char *name, int len, unsigned lon #define ident_hash_add(oldhash,c) ((oldhash)*11 + (c)) #define ident_hash_end(hash) (hash) +struct token *built_in_token(int stream, const char *name) +{ + int len = 1; + unsigned long hash; + struct token *token; + const unsigned char *p = (const unsigned char *)name; + + hash = ident_hash_init(*p++); + for (;;) { + unsigned int i = *p++; + if (!i) + break; + hash = ident_hash_add(hash, i); + len++; + } + hash = ident_hash_end(hash); + token = alloc_token(stream, 0, 0); + token->type = TOKEN_IDENT; + token->ident = create_hashed_ident(name, len, hash); + return token; +} + static int get_one_identifier(int c, action_t *action) { struct token *token; @@ -567,8 +601,8 @@ static int get_one_identifier(int c, action_t *action) /* Pass it on.. */ token = action->token; - token->value.type = TOKEN_IDENT; - token->value.ident = ident; + token->type = TOKEN_IDENT; + token->ident = ident; add_token(action); return next; } @@ -606,16 +640,7 @@ struct token * tokenize(const char *name, int fd) c = nextchar(&action); while (c != EOF) { if (!isspace(c)) { - struct token *token = malloc(sizeof(struct token)); - if (!token) - die("Out of memory for token"); - - memset(token, 0, sizeof(struct token)); - token->line = action.line; - token->pos = action.pos; - token->stream = stream; - - action.token = token; + action.token = alloc_token(stream, action.line, action.pos); c = get_one_token(c, &action); continue; |
