diff options
| author | Linus Torvalds <torvalds@penguin.transmeta.com> | 2003-03-14 17:08:12 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-07 20:59:16 -0700 |
| commit | 2d76ef523eb4606a60b7e95fd67b4c6d302a2b16 (patch) | |
| tree | 071211260972d1010dad6e64446b224dea96922c | |
| parent | ffa4ddda950185bf313654e903207a3fc2d5f261 (diff) | |
| download | sparse-dev-2d76ef523eb4606a60b7e95fd67b4c6d302a2b16.tar.gz | |
Add more type parsing: function and array declarators, function
parameter lists etc. It can now parse things like
const volatile int hello(const int (*argc)(void), const char *const* argv);
apparently correctly.
| -rw-r--r-- | parse.c | 162 | ||||
| -rw-r--r-- | parse.h | 2 | ||||
| -rw-r--r-- | symbol.c | 54 | ||||
| -rw-r--r-- | symbol.h | 23 | ||||
| -rw-r--r-- | test-parsing.c | 10 | ||||
| -rw-r--r-- | tokenize.c | 2 |
6 files changed, 165 insertions, 88 deletions
@@ -344,10 +344,8 @@ struct statement *alloc_statement(struct token * token, int type) return stmt; } -static struct token *declaration_specifiers(struct token *token, struct symbol **tree) +static struct token *declaration_specifiers(struct token *token, struct symbol *sym) { - struct symbol *sym = *tree; - for ( ; token; token = token->next) { struct ident *ident; struct symbol *s; @@ -390,33 +388,22 @@ static struct token *declaration_specifiers(struct token *token, struct symbol * sym->modifiers = old | mod | extra; } } - - if (!sym->base_type) - sym->base_type = &int_type; - *tree = sym; return token; } -static struct symbol *indirect(struct symbol *type) +static void force_default_type(struct symbol *sym) { - struct symbol *sym = alloc_symbol(SYM_PTR); - - sym->base_type = type; - return sym; + if (!sym->base_type) + sym->base_type = &int_type; } -static struct token *parameter_declaration(struct token *token, struct symbol **tree); -static struct token *parameter_type_list(struct token *token, struct symbol **tree) +static struct symbol *indirect(struct symbol *parent, int type) { - return parameter_declaration(token, tree); -} + struct symbol *sym = alloc_symbol(type); -static struct token *abstract_function_declarator(struct token *token, struct symbol **tree) -{ - struct symbol *sym = alloc_symbol(SYM_FN); - sym->base_type = *tree; - *tree = sym; - return parameter_type_list(token, &sym->next_type); + force_default_type(parent); + sym->base_type = parent; + return sym; } static int constant_value(struct expression *expr) @@ -427,46 +414,54 @@ static int constant_value(struct expression *expr) static struct token *abstract_array_declarator(struct token *token, struct symbol **tree) { struct expression *expr; - struct symbol *sym = alloc_symbol(SYM_ARRAY); - sym->base_type = *tree; - *tree = sym; + struct symbol *sym = *tree; token = parse_expression(token, &expr); sym->size = constant_value(expr); return token; } +static struct token *abstract_function_declarator(struct token *token, struct symbol **tree); + static struct token *direct_declarator(struct token *token, struct symbol **tree, struct token *(*declarator)(struct token *, struct symbol **, struct token **), struct token **ident) { - if (ident && token->type == TOKEN_IDENT) { + if (ident && token && token->type == TOKEN_IDENT) { *ident = token; - return token->next; + token = token->next; } - - if (token->type != TOKEN_SPECIAL) - return token; - /* - * This can be either a function or a grouping! - * A grouping must start with '*', '[' or '('.. - */ - if (token->special == '(') { - struct token *next = token->next; - if (next && next->type == TOKEN_SPECIAL) { - if (next->special == '*' || - next->special == '(' || - next->special == '[') { - token = declarator(next,tree, ident); - return expect(token, ')', "in nested declarator"); - } + for (;;) { + if (!token || token->type != TOKEN_SPECIAL) + return token; + + /* + * This can be either a function or a grouping! + * A grouping must start with '*', '[' or '('.. + */ + if (token->special == '(') { + struct token *next = token->next; + if (!ident && next && next->type == TOKEN_SPECIAL) { + if (next->special == '*' || + next->special == '(' || + next->special == '[') { + token = declarator(next,tree, ident); + token = expect(token, ')', "in nested declarator"); + continue; + } + } + *tree = indirect(*tree, SYM_FN); + token = abstract_function_declarator(next, &(*tree)->children); + token = expect(token, ')', "in function declarator"); + continue; } - token = abstract_function_declarator(next, tree); - return expect(token, ')', "in function declarator"); - } - if (token->special == '[') { - token = abstract_array_declarator(token->next, tree); - return expect(token, ']', "in abstract_array_declarator"); + if (token->special == '[') { + *tree = indirect(*tree, SYM_ARRAY); + token = abstract_array_declarator(token->next, tree); + token = expect(token, ']', "in abstract_array_declarator"); + continue; + } + break; } return token; } @@ -474,8 +469,8 @@ static struct token *direct_declarator(struct token *token, struct symbol **tree static struct token *pointer(struct token *token, struct symbol **tree) { while (match_op(token,'*')) { - *tree = indirect(*tree); - token = declaration_specifiers(token->next, tree); + *tree = indirect(*tree, SYM_PTR); + token = declaration_specifiers(token->next, *tree); } return token; } @@ -489,24 +484,12 @@ static struct token *generic_declarator(struct token *token, struct symbol **tre #define abstract_declarator(token, symbol) \ generic_declarator(token, symbol, NULL) -static struct token *declarator(struct token *token, struct symbol **tree) -{ - struct token *ident = NULL; - token = pointer(token, tree); - token = direct_declarator(token, tree, generic_declarator, &ident); - if (ident) { - printf("declarator for %s:\n", show_token(ident)); - show_type(*tree); - } - return token; -} - static struct token *parameter_declaration(struct token *token, struct symbol **tree) { struct token *ident = NULL; *tree = alloc_symbol(SYM_TYPE); - token = declaration_specifiers(token, tree); + token = declaration_specifiers(token, *tree); token = pointer(token, tree); token = direct_declarator(token, tree, generic_declarator, &ident); if (ident) { @@ -520,7 +503,7 @@ static struct token *parameter_declaration(struct token *token, struct symbol ** static struct token *typename(struct token *token, struct symbol **tree) { *tree = alloc_symbol(SYM_TYPE); - token = declaration_specifiers(token, tree); + token = declaration_specifiers(token, *tree); return abstract_declarator(token, tree); } @@ -554,3 +537,52 @@ struct token * statement_list(struct token *token, struct statement **tree) } while (stmt); return token; } + +static struct token *parameter_type_list(struct token *token, struct symbol **tree) +{ + for (;;) { + struct symbol *sym = alloc_symbol(SYM_TYPE); + + *tree = sym; + token = parameter_declaration(token, tree); + if (!match_op(token, ',')) + break; + if (*tree) + tree = &(*tree)->next; + token = token->next; + } + return token; +} + +static struct token *abstract_function_declarator(struct token *token, struct symbol **tree) +{ + return parameter_type_list(token, tree); +} + +static struct token *declaration(struct token *token, struct symbol **tree) +{ + struct token *ident = NULL; + + *tree = alloc_symbol(SYM_TYPE); + token = declaration_specifiers(token, *tree); + token = pointer(token, tree); + token = direct_declarator(token, tree, generic_declarator, &ident); + if (ident) { + printf("named declarator %s:\n ", show_token(ident)); + show_type(*tree); + printf("\n\n"); + } + return token; +} + +struct token * translation_unit(struct token *token, struct symbol **tree) +{ + for (;;) { + token = declaration(token, tree); + if (!match_op(token, ';')) + return token; + if (*tree) + tree = &(*tree)->next; + token = token->next; + } +} @@ -55,5 +55,7 @@ struct statement { extern struct token *parse_expression(struct token *, struct expression **); extern struct token *statement_list(struct token *, struct statement **); extern void show_expression(struct expression *); +extern struct token *translation_unit(struct token *, struct symbol **); +extern struct token * translation_unit(struct token *, struct symbol **); #endif /* PARSE_H */ @@ -12,8 +12,8 @@ const char *modifier_string(unsigned long mod) char *p = buffer; const char *res,**ptr, *names[] = { "auto", "register", "static", "extern", + "const", "volatile", "signed", "unsigned", "char", "short", "long", "long", - "signed", "unsigned", "const", "volatile", NULL }; ptr = names; @@ -26,16 +26,23 @@ const char *modifier_string(unsigned long mod) } mod >>= 1; } - *p = 0; + *p++ = 0; + *p++ = 0; return buffer+1; } -const char *type_string(struct symbol *sym) +const char *type_string(unsigned int modifiers, struct symbol *sym) { + if (!sym) + return "<notype>"; + if (sym->token) return sym->token->ident->name; - if (sym == &int_type) + if (sym == &int_type) { + if (modifiers & (SYM_CHAR | SYM_SHORT | SYM_LONG)) + return ""; return "int"; + } if (sym == &fp_type) return "float"; if (sym == &void_type) @@ -45,37 +52,68 @@ const char *type_string(struct symbol *sym) return "unknown"; } +static void show_type_list(struct symbol *sym) +{ + while (sym) { + show_type(sym); + printf("\n\t"); + sym = sym->next; + } +} + void show_type(struct symbol *sym) { + if (!sym) { + printf("<nosym>"); + return; + } + switch (sym->type) { case SYM_PTR: + printf("%s", modifier_string(sym->modifiers)); printf("*("); show_type(sym->base_type); printf(")"); break; + case SYM_FN: + printf("%s", modifier_string(sym->modifiers)); show_type(sym->base_type); - printf("( ... )"); + + printf("(\n\t"); + show_type_list(sym->children); + printf(" )"); break; + case SYM_ARRAY: + printf("%s", modifier_string(sym->modifiers)); show_type(sym->base_type); printf("[ ... ]"); break; + case SYM_TYPE: - printf("%s %s", modifier_string(sym->modifiers), type_string(sym->base_type)); + printf("%s %s", modifier_string(sym->modifiers), type_string(sym->modifiers, sym->base_type)); break; + default: printf("<bad type>"); } } +void show_symbol(struct symbol *sym) +{ + printf("Symbol %s:\n ", show_token(sym->token)); + show_type(sym); + printf("\n"); +} + struct symbol *alloc_symbol(int type) { struct symbol *sym = malloc(sizeof(struct symbol)); - memset(sym, 0, sizeof(*sym)); if (!sym) die("out of memory for symbol information"); + memset(sym, 0, sizeof(*sym)); sym->type = type; return sym; } @@ -92,7 +130,7 @@ void bind_symbol(struct symbol *sym, struct token *token) die("Internal error: trying to make a symbol out of a non-identifier"); ident = token->ident; sym->token = token; - sym->next = ident->symbol; + sym->next_id = ident->symbol; ident->symbol = sym; } @@ -17,12 +17,13 @@ */ struct symbol { struct token *token; /* Where this symbol was declared */ - struct symbol *next; /* Next semantic symbol that shares this identifier */ + struct symbol *next; /* Next symbol at this level */ + struct symbol *next_id; /* Next semantic symbol that shares this identifier */ int type; unsigned long size; unsigned long modifiers; struct symbol *base_type; - struct symbol *next_type; /* Next member in this struct/union? */ + struct symbol *children; }; /* Modifiers */ @@ -31,15 +32,16 @@ struct symbol { #define SYM_STATIC 0x0004 #define SYM_EXTERN 0x0008 -#define SYM_CHAR 0x0010 -#define SYM_SHORT 0x0020 -#define SYM_LONG 0x0040 -#define SYM_LONGLONG 0x0080 +#define SYM_CONST 0x0010 +#define SYM_VOLATILE 0x0020 +#define SYM_SIGNED 0x0030 +#define SYM_UNSIGNED 0x0040 + +#define SYM_CHAR 0x0100 +#define SYM_SHORT 0x0200 +#define SYM_LONG 0x0400 +#define SYM_LONGLONG 0x0800 -#define SYM_SIGNED 0x0100 -#define SYM_UNSIGNED 0x0200 -#define SYM_CONST 0x0400 -#define SYM_VOLATILE 0x0800 /* Basic types */ extern struct symbol void_type, @@ -60,5 +62,6 @@ extern void init_symbols(void); extern struct symbol *alloc_symbol(int type); extern void show_type(struct symbol *); extern const char *modifier_string(unsigned long mod); +extern void show_symbol(struct symbol *); #endif /* SEMANTIC_H */ diff --git a/test-parsing.c b/test-parsing.c index c002fa41..6aaf4753 100644 --- a/test-parsing.c +++ b/test-parsing.c @@ -14,20 +14,20 @@ int main(int argc, char **argv) { int fd = open(argv[1], O_RDONLY); struct token *token; - struct statement *stmt; + struct symbol *sym; if (fd < 0) die("No such file: %s", argv[1]); init_symbols(); token = tokenize(argv[1], fd); - token = statement_list(token, &stmt); + token = translation_unit(token, &sym); if (token) warn(token, "Extra data"); - while (stmt) { - show_expression(stmt->expression); + while (sym) { + show_symbol(sym); printf("\n"); - stmt = stmt->next; + sym = sym->next; } return 0; } @@ -38,6 +38,8 @@ const char *show_token(const struct token *token) { static char buffer[256]; + if (!token) + return "<no token>"; switch (token->type) { case TOKEN_ERROR: return "syntax error"; |
