aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
authorLinus Torvalds <torvalds@penguin.transmeta.com>2003-03-14 17:08:12 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-07 20:59:16 -0700
commit2d76ef523eb4606a60b7e95fd67b4c6d302a2b16 (patch)
tree071211260972d1010dad6e64446b224dea96922c
parentffa4ddda950185bf313654e903207a3fc2d5f261 (diff)
downloadsparse-dev-2d76ef523eb4606a60b7e95fd67b4c6d302a2b16.tar.gz
Add more type parsing: function and array declarators, function
parameter lists etc. It can now parse things like const volatile int hello(const int (*argc)(void), const char *const* argv); apparently correctly.
-rw-r--r--parse.c162
-rw-r--r--parse.h2
-rw-r--r--symbol.c54
-rw-r--r--symbol.h23
-rw-r--r--test-parsing.c10
-rw-r--r--tokenize.c2
6 files changed, 165 insertions, 88 deletions
diff --git a/parse.c b/parse.c
index b86e8c9f..c3df47cd 100644
--- a/parse.c
+++ b/parse.c
@@ -344,10 +344,8 @@ struct statement *alloc_statement(struct token * token, int type)
return stmt;
}
-static struct token *declaration_specifiers(struct token *token, struct symbol **tree)
+static struct token *declaration_specifiers(struct token *token, struct symbol *sym)
{
- struct symbol *sym = *tree;
-
for ( ; token; token = token->next) {
struct ident *ident;
struct symbol *s;
@@ -390,33 +388,22 @@ static struct token *declaration_specifiers(struct token *token, struct symbol *
sym->modifiers = old | mod | extra;
}
}
-
- if (!sym->base_type)
- sym->base_type = &int_type;
- *tree = sym;
return token;
}
-static struct symbol *indirect(struct symbol *type)
+static void force_default_type(struct symbol *sym)
{
- struct symbol *sym = alloc_symbol(SYM_PTR);
-
- sym->base_type = type;
- return sym;
+ if (!sym->base_type)
+ sym->base_type = &int_type;
}
-static struct token *parameter_declaration(struct token *token, struct symbol **tree);
-static struct token *parameter_type_list(struct token *token, struct symbol **tree)
+static struct symbol *indirect(struct symbol *parent, int type)
{
- return parameter_declaration(token, tree);
-}
+ struct symbol *sym = alloc_symbol(type);
-static struct token *abstract_function_declarator(struct token *token, struct symbol **tree)
-{
- struct symbol *sym = alloc_symbol(SYM_FN);
- sym->base_type = *tree;
- *tree = sym;
- return parameter_type_list(token, &sym->next_type);
+ force_default_type(parent);
+ sym->base_type = parent;
+ return sym;
}
static int constant_value(struct expression *expr)
@@ -427,46 +414,54 @@ static int constant_value(struct expression *expr)
static struct token *abstract_array_declarator(struct token *token, struct symbol **tree)
{
struct expression *expr;
- struct symbol *sym = alloc_symbol(SYM_ARRAY);
- sym->base_type = *tree;
- *tree = sym;
+ struct symbol *sym = *tree;
token = parse_expression(token, &expr);
sym->size = constant_value(expr);
return token;
}
+static struct token *abstract_function_declarator(struct token *token, struct symbol **tree);
+
static struct token *direct_declarator(struct token *token, struct symbol **tree,
struct token *(*declarator)(struct token *, struct symbol **, struct token **),
struct token **ident)
{
- if (ident && token->type == TOKEN_IDENT) {
+ if (ident && token && token->type == TOKEN_IDENT) {
*ident = token;
- return token->next;
+ token = token->next;
}
-
- if (token->type != TOKEN_SPECIAL)
- return token;
- /*
- * This can be either a function or a grouping!
- * A grouping must start with '*', '[' or '('..
- */
- if (token->special == '(') {
- struct token *next = token->next;
- if (next && next->type == TOKEN_SPECIAL) {
- if (next->special == '*' ||
- next->special == '(' ||
- next->special == '[') {
- token = declarator(next,tree, ident);
- return expect(token, ')', "in nested declarator");
- }
+ for (;;) {
+ if (!token || token->type != TOKEN_SPECIAL)
+ return token;
+
+ /*
+ * This can be either a function or a grouping!
+ * A grouping must start with '*', '[' or '('..
+ */
+ if (token->special == '(') {
+ struct token *next = token->next;
+ if (!ident && next && next->type == TOKEN_SPECIAL) {
+ if (next->special == '*' ||
+ next->special == '(' ||
+ next->special == '[') {
+ token = declarator(next,tree, ident);
+ token = expect(token, ')', "in nested declarator");
+ continue;
+ }
+ }
+ *tree = indirect(*tree, SYM_FN);
+ token = abstract_function_declarator(next, &(*tree)->children);
+ token = expect(token, ')', "in function declarator");
+ continue;
}
- token = abstract_function_declarator(next, tree);
- return expect(token, ')', "in function declarator");
- }
- if (token->special == '[') {
- token = abstract_array_declarator(token->next, tree);
- return expect(token, ']', "in abstract_array_declarator");
+ if (token->special == '[') {
+ *tree = indirect(*tree, SYM_ARRAY);
+ token = abstract_array_declarator(token->next, tree);
+ token = expect(token, ']', "in abstract_array_declarator");
+ continue;
+ }
+ break;
}
return token;
}
@@ -474,8 +469,8 @@ static struct token *direct_declarator(struct token *token, struct symbol **tree
static struct token *pointer(struct token *token, struct symbol **tree)
{
while (match_op(token,'*')) {
- *tree = indirect(*tree);
- token = declaration_specifiers(token->next, tree);
+ *tree = indirect(*tree, SYM_PTR);
+ token = declaration_specifiers(token->next, *tree);
}
return token;
}
@@ -489,24 +484,12 @@ static struct token *generic_declarator(struct token *token, struct symbol **tre
#define abstract_declarator(token, symbol) \
generic_declarator(token, symbol, NULL)
-static struct token *declarator(struct token *token, struct symbol **tree)
-{
- struct token *ident = NULL;
- token = pointer(token, tree);
- token = direct_declarator(token, tree, generic_declarator, &ident);
- if (ident) {
- printf("declarator for %s:\n", show_token(ident));
- show_type(*tree);
- }
- return token;
-}
-
static struct token *parameter_declaration(struct token *token, struct symbol **tree)
{
struct token *ident = NULL;
*tree = alloc_symbol(SYM_TYPE);
- token = declaration_specifiers(token, tree);
+ token = declaration_specifiers(token, *tree);
token = pointer(token, tree);
token = direct_declarator(token, tree, generic_declarator, &ident);
if (ident) {
@@ -520,7 +503,7 @@ static struct token *parameter_declaration(struct token *token, struct symbol **
static struct token *typename(struct token *token, struct symbol **tree)
{
*tree = alloc_symbol(SYM_TYPE);
- token = declaration_specifiers(token, tree);
+ token = declaration_specifiers(token, *tree);
return abstract_declarator(token, tree);
}
@@ -554,3 +537,52 @@ struct token * statement_list(struct token *token, struct statement **tree)
} while (stmt);
return token;
}
+
+static struct token *parameter_type_list(struct token *token, struct symbol **tree)
+{
+ for (;;) {
+ struct symbol *sym = alloc_symbol(SYM_TYPE);
+
+ *tree = sym;
+ token = parameter_declaration(token, tree);
+ if (!match_op(token, ','))
+ break;
+ if (*tree)
+ tree = &(*tree)->next;
+ token = token->next;
+ }
+ return token;
+}
+
+static struct token *abstract_function_declarator(struct token *token, struct symbol **tree)
+{
+ return parameter_type_list(token, tree);
+}
+
+static struct token *declaration(struct token *token, struct symbol **tree)
+{
+ struct token *ident = NULL;
+
+ *tree = alloc_symbol(SYM_TYPE);
+ token = declaration_specifiers(token, *tree);
+ token = pointer(token, tree);
+ token = direct_declarator(token, tree, generic_declarator, &ident);
+ if (ident) {
+ printf("named declarator %s:\n ", show_token(ident));
+ show_type(*tree);
+ printf("\n\n");
+ }
+ return token;
+}
+
+struct token * translation_unit(struct token *token, struct symbol **tree)
+{
+ for (;;) {
+ token = declaration(token, tree);
+ if (!match_op(token, ';'))
+ return token;
+ if (*tree)
+ tree = &(*tree)->next;
+ token = token->next;
+ }
+}
diff --git a/parse.h b/parse.h
index c3c7f0fc..302d8ff8 100644
--- a/parse.h
+++ b/parse.h
@@ -55,5 +55,7 @@ struct statement {
extern struct token *parse_expression(struct token *, struct expression **);
extern struct token *statement_list(struct token *, struct statement **);
extern void show_expression(struct expression *);
+extern struct token *translation_unit(struct token *, struct symbol **);
+extern struct token * translation_unit(struct token *, struct symbol **);
#endif /* PARSE_H */
diff --git a/symbol.c b/symbol.c
index aeb2ed6e..f2ee822d 100644
--- a/symbol.c
+++ b/symbol.c
@@ -12,8 +12,8 @@ const char *modifier_string(unsigned long mod)
char *p = buffer;
const char *res,**ptr, *names[] = {
"auto", "register", "static", "extern",
+ "const", "volatile", "signed", "unsigned",
"char", "short", "long", "long",
- "signed", "unsigned", "const", "volatile",
NULL
};
ptr = names;
@@ -26,16 +26,23 @@ const char *modifier_string(unsigned long mod)
}
mod >>= 1;
}
- *p = 0;
+ *p++ = 0;
+ *p++ = 0;
return buffer+1;
}
-const char *type_string(struct symbol *sym)
+const char *type_string(unsigned int modifiers, struct symbol *sym)
{
+ if (!sym)
+ return "<notype>";
+
if (sym->token)
return sym->token->ident->name;
- if (sym == &int_type)
+ if (sym == &int_type) {
+ if (modifiers & (SYM_CHAR | SYM_SHORT | SYM_LONG))
+ return "";
return "int";
+ }
if (sym == &fp_type)
return "float";
if (sym == &void_type)
@@ -45,37 +52,68 @@ const char *type_string(struct symbol *sym)
return "unknown";
}
+static void show_type_list(struct symbol *sym)
+{
+ while (sym) {
+ show_type(sym);
+ printf("\n\t");
+ sym = sym->next;
+ }
+}
+
void show_type(struct symbol *sym)
{
+ if (!sym) {
+ printf("<nosym>");
+ return;
+ }
+
switch (sym->type) {
case SYM_PTR:
+ printf("%s", modifier_string(sym->modifiers));
printf("*(");
show_type(sym->base_type);
printf(")");
break;
+
case SYM_FN:
+ printf("%s", modifier_string(sym->modifiers));
show_type(sym->base_type);
- printf("( ... )");
+
+ printf("(\n\t");
+ show_type_list(sym->children);
+ printf(" )");
break;
+
case SYM_ARRAY:
+ printf("%s", modifier_string(sym->modifiers));
show_type(sym->base_type);
printf("[ ... ]");
break;
+
case SYM_TYPE:
- printf("%s %s", modifier_string(sym->modifiers), type_string(sym->base_type));
+ printf("%s %s", modifier_string(sym->modifiers), type_string(sym->modifiers, sym->base_type));
break;
+
default:
printf("<bad type>");
}
}
+void show_symbol(struct symbol *sym)
+{
+ printf("Symbol %s:\n ", show_token(sym->token));
+ show_type(sym);
+ printf("\n");
+}
+
struct symbol *alloc_symbol(int type)
{
struct symbol *sym = malloc(sizeof(struct symbol));
- memset(sym, 0, sizeof(*sym));
if (!sym)
die("out of memory for symbol information");
+ memset(sym, 0, sizeof(*sym));
sym->type = type;
return sym;
}
@@ -92,7 +130,7 @@ void bind_symbol(struct symbol *sym, struct token *token)
die("Internal error: trying to make a symbol out of a non-identifier");
ident = token->ident;
sym->token = token;
- sym->next = ident->symbol;
+ sym->next_id = ident->symbol;
ident->symbol = sym;
}
diff --git a/symbol.h b/symbol.h
index f393c0e2..6ad85f4a 100644
--- a/symbol.h
+++ b/symbol.h
@@ -17,12 +17,13 @@
*/
struct symbol {
struct token *token; /* Where this symbol was declared */
- struct symbol *next; /* Next semantic symbol that shares this identifier */
+ struct symbol *next; /* Next symbol at this level */
+ struct symbol *next_id; /* Next semantic symbol that shares this identifier */
int type;
unsigned long size;
unsigned long modifiers;
struct symbol *base_type;
- struct symbol *next_type; /* Next member in this struct/union? */
+ struct symbol *children;
};
/* Modifiers */
@@ -31,15 +32,16 @@ struct symbol {
#define SYM_STATIC 0x0004
#define SYM_EXTERN 0x0008
-#define SYM_CHAR 0x0010
-#define SYM_SHORT 0x0020
-#define SYM_LONG 0x0040
-#define SYM_LONGLONG 0x0080
+#define SYM_CONST 0x0010
+#define SYM_VOLATILE 0x0020
+#define SYM_SIGNED 0x0030
+#define SYM_UNSIGNED 0x0040
+
+#define SYM_CHAR 0x0100
+#define SYM_SHORT 0x0200
+#define SYM_LONG 0x0400
+#define SYM_LONGLONG 0x0800
-#define SYM_SIGNED 0x0100
-#define SYM_UNSIGNED 0x0200
-#define SYM_CONST 0x0400
-#define SYM_VOLATILE 0x0800
/* Basic types */
extern struct symbol void_type,
@@ -60,5 +62,6 @@ extern void init_symbols(void);
extern struct symbol *alloc_symbol(int type);
extern void show_type(struct symbol *);
extern const char *modifier_string(unsigned long mod);
+extern void show_symbol(struct symbol *);
#endif /* SEMANTIC_H */
diff --git a/test-parsing.c b/test-parsing.c
index c002fa41..6aaf4753 100644
--- a/test-parsing.c
+++ b/test-parsing.c
@@ -14,20 +14,20 @@ int main(int argc, char **argv)
{
int fd = open(argv[1], O_RDONLY);
struct token *token;
- struct statement *stmt;
+ struct symbol *sym;
if (fd < 0)
die("No such file: %s", argv[1]);
init_symbols();
token = tokenize(argv[1], fd);
- token = statement_list(token, &stmt);
+ token = translation_unit(token, &sym);
if (token)
warn(token, "Extra data");
- while (stmt) {
- show_expression(stmt->expression);
+ while (sym) {
+ show_symbol(sym);
printf("\n");
- stmt = stmt->next;
+ sym = sym->next;
}
return 0;
}
diff --git a/tokenize.c b/tokenize.c
index 5ce19969..b2d58db3 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -38,6 +38,8 @@ const char *show_token(const struct token *token)
{
static char buffer[256];
+ if (!token)
+ return "<no token>";
switch (token->type) {
case TOKEN_ERROR:
return "syntax error";