aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
authorLinus Torvalds <torvalds@home.transmeta.com>2003-03-13 18:10:50 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-07 20:59:15 -0700
commita9400ca17ec93398a39a69079f10f429d37de18b (patch)
tree2c721188234b441b53f7f15b8ad01168d1c14249
parent24104678556e6d8cc7ff3f775dfdd42c21c9f995 (diff)
downloadsparse-dev-a9400ca17ec93398a39a69079f10f429d37de18b.tar.gz
Start handling minimal semantic information, needed for types.
This adds a layer of symbol information on top of the raw tokens.
-rw-r--r--Makefile5
-rw-r--r--parse.c75
-rw-r--r--parse.h5
-rw-r--r--symbol.c33
-rw-r--r--symbol.h37
-rw-r--r--test-lexing.c2
-rw-r--r--test-parsing.c2
-rw-r--r--token.h15
-rw-r--r--tokenize.c81
9 files changed, 197 insertions, 58 deletions
diff --git a/Makefile b/Makefile
index e6fee791..81380e32 100644
--- a/Makefile
+++ b/Makefile
@@ -7,13 +7,14 @@ all: $(PROGRAMS)
test-lexing: test-lexing.o tokenize.o lib.o
gcc -o $@ test-lexing.o tokenize.o lib.o
-test-parsing: test-parsing.o parse.o tokenize.o lib.o
- gcc -o $@ test-parsing.o parse.o tokenize.o lib.o
+test-parsing: test-parsing.o parse.o tokenize.o symbol.o lib.o
+ gcc -o $@ test-parsing.o parse.o tokenize.o symbol.o lib.o
test-parsing.o: token.h parse.h
test-lexing.o: token.h
tokenize.o: token.h
parse.o: token.h parse.h
+symbol.o: symbol.h token.h parse.h
clean:
rm -f *.[oasi] $(PROGRAMS)
diff --git a/parse.c b/parse.c
index eaee642c..9d52ed75 100644
--- a/parse.c
+++ b/parse.c
@@ -13,45 +13,46 @@
#include "token.h"
#include "parse.h"
+#include "symbol.h"
void show_expression(struct expression *expr)
{
if (!expr)
return;
+ printf("< ");
switch (expr->type) {
case EXPR_BINOP:
- printf("< ");
show_expression(expr->left);
printf(" %s ", show_special(expr->op));
show_expression(expr->right);
- printf(" >");
break;
case EXPR_PREOP:
- printf("( ");
printf(" %s ", show_special(expr->op));
show_expression(expr->unop);
- printf(" )");
break;
case EXPR_POSTOP:
- printf("( ");
show_expression(expr->unop);
printf(" %s ", show_special(expr->op));
- printf(" )");
break;
case EXPR_PRIMARY:
printf("%s", show_token(expr->token));
break;
case EXPR_DEREF:
- printf("< ");
show_expression(expr->deref);
printf("%s", show_special(expr->op));
printf("%s", show_token(expr->member));
- printf(" >");
+ break;
+ case EXPR_CAST:
+ printf("(");
+ show_expression(expr->cast_type);
+ printf(")");
+ show_expression(expr->cast_expression);
break;
default:
printf("WTF");
}
+ printf(" >");
}
static struct expression *alloc_expression(struct token *token, int type)
@@ -66,11 +67,14 @@ static struct expression *alloc_expression(struct token *token, int type)
return expr;
}
+static int match_op(struct token *token, int op)
+{
+ return token && token->type == TOKEN_SPECIAL && token->special == op;
+}
+
static struct token *expect(struct token *token, int op)
{
- if (!token ||
- token->value.type != TOKEN_SPECIAL ||
- token->value.special != op) {
+ if (!match_op(token, op)) {
warn(token, "Expected %s", show_special(op));
return token;
}
@@ -83,7 +87,13 @@ static struct token *primary_expression(struct token *token, struct expression *
{
struct expression *expr = NULL;
- switch (token->value.type) {
+ if (!token) {
+ warn(token, "unexpected end of file");
+ *tree = NULL;
+ return token;
+ }
+
+ switch (token->type) {
case TOKEN_IDENT:
case TOKEN_INTEGER:
case TOKEN_FP:
@@ -93,7 +103,7 @@ static struct token *primary_expression(struct token *token, struct expression *
break;
case TOKEN_SPECIAL:
- if (token->value.special == '(') {
+ if (token->special == '(') {
expr = alloc_expression(token, EXPR_PREOP);
expr->op = '(';
token = parse_expression(token->next, &expr->unop);
@@ -113,8 +123,8 @@ static struct token *postfix_expression(struct token *token, struct expression *
struct expression *expr = NULL;
token = primary_expression(token, &expr);
- while (expr && token && token->value.type == TOKEN_SPECIAL) {
- switch (token->value.special) {
+ while (expr && token && token->type == TOKEN_SPECIAL) {
+ switch (token->special) {
case '[': { /* Array dereference */
struct expression *array_expr = alloc_expression(token, EXPR_BINOP);
array_expr->op = '[';
@@ -127,7 +137,7 @@ static struct token *postfix_expression(struct token *token, struct expression *
case SPECIAL_INCREMENT: /* Post-increment */
case SPECIAL_DECREMENT: { /* Post-decrement */
struct expression *post = alloc_expression(token, EXPR_POSTOP);
- post->op = token->value.special;
+ post->op = token->special;
post->unop = expr;
expr = post;
token = token->next;
@@ -136,10 +146,10 @@ static struct token *postfix_expression(struct token *token, struct expression *
case '.': /* Structure member dereference */
case SPECIAL_DEREFERENCE: { /* Structure pointer member dereference */
struct expression *deref = alloc_expression(token, EXPR_DEREF);
- deref->op = token->value.special;
+ deref->op = token->special;
deref->deref = expr;
token = token->next;
- if (!token || token->value.type != TOKEN_IDENT) {
+ if (!token || token->type != TOKEN_IDENT) {
warn(token, "Expected member name");
break;
}
@@ -173,8 +183,33 @@ static struct token *unary_expression(struct token *token, struct expression **t
return postfix_expression(token, tree);
}
+/* This is bogus, but before I have real types. */
+static struct token *typename_expression(struct token *token, struct expression **tree)
+{
+ return parse_expression(token,tree);
+}
+
+/*
+ * Ambiguity: a '(' can be either a cast-expression or
+ * a primary-expression depending on whether it is followed
+ * by a type or not.
+ */
static struct token *cast_expression(struct token *token, struct expression **tree)
{
+ if (match_op(token, '(')) {
+ struct token *next = token->next;
+ if (next && next->type == TOKEN_IDENT) {
+ struct symbol *sym = next->ident->symbol;
+ if (sym && symbol_is_typename(sym)) {
+ struct expression *cast = alloc_expression(next, EXPR_CAST);
+ token = typename_expression(next, &cast->cast_type);
+ token = expect(token, ')');
+ token = cast_expression(token, &cast->cast_expression);
+ *tree = cast;
+ return token;
+ }
+ }
+ }
return unary_expression(token, tree);
}
@@ -186,9 +221,9 @@ static struct token *lr_binop_expression(struct token *token, struct expression
struct token * next = inner(token, &left);
if (left) {
- while (next && next->value.type == TOKEN_SPECIAL) {
+ while (next && next->type == TOKEN_SPECIAL) {
struct expression *top, *right = NULL;
- int op = next->value.special;
+ int op = next->special;
va_list args;
va_start(args, inner);
diff --git a/parse.h b/parse.h
index 27421184..c8cf6b48 100644
--- a/parse.h
+++ b/parse.h
@@ -7,6 +7,7 @@ enum expression_type {
EXPR_DEREF,
EXPR_PREOP,
EXPR_POSTOP,
+ EXPR_CAST,
};
struct expression {
@@ -21,6 +22,10 @@ struct expression {
struct expression *deref;
struct token *member;
};
+ struct cast_arg {
+ struct expression *cast_type;
+ struct expression *cast_expression;
+ };
};
};
diff --git a/symbol.c b/symbol.c
new file mode 100644
index 00000000..ed09a167
--- /dev/null
+++ b/symbol.c
@@ -0,0 +1,33 @@
+#include <stdlib.h>
+#include "token.h"
+#include "symbol.h"
+
+struct symbol *alloc_symbol(struct token *token, int type)
+{
+ struct symbol *sym = malloc(sizeof(struct symbol));
+ struct ident *ident;
+
+ if (token->type != TOKEN_IDENT)
+ die("Internal error: trying to make a symbol out of a non-identifier");
+ ident = token->ident;
+ if (!sym)
+ die("out of memory for symbol information");
+ sym->token = token;
+ sym->next = ident->symbol;
+ sym->type = type;
+ ident->symbol = sym;
+ return sym;
+}
+
+struct symbol *create_symbol(int stream, const char *name, int type)
+{
+ return alloc_symbol(built_in_token(stream, name), type);
+}
+
+void init_symbols(void)
+{
+ int stream = init_stream("builtin");
+ struct symbol *sym;
+
+ sym = create_symbol(stream, "int", SYM_TYPEDEF);
+}
diff --git a/symbol.h b/symbol.h
new file mode 100644
index 00000000..cb3de36e
--- /dev/null
+++ b/symbol.h
@@ -0,0 +1,37 @@
+#ifndef SEMANTIC_H
+#define SEMANTIC_H
+
+#include "token.h"
+
+/*
+ * An identifier with semantic meaning is a "symbol".
+ *
+ * There's a 1:n relationship: each symbol is always
+ * associated with one identifier, while each identifier
+ * can have one or more semantic meanings due to C scope
+ * rules.
+ *
+ * The progression is symbol -> token -> identifier. The
+ * token contains the information on where the symbol was
+ * declared.
+ */
+struct symbol {
+ struct token *token; /* Where this symbol was declared */
+ struct symbol *next; /* Next semantic symbol that shares this identifier */
+ int type;
+};
+
+enum symbol_types {
+ SYM_NONE = 0, /* regular variable */
+ SYM_MEMBER, /* structure member */
+ SYM_TYPEDEF, /* typedef */
+ SYM_SPECIFIER, /* specifier */
+ SYM_QUALIFIER, /* type qualifier */
+};
+
+#define symbol_is_typename(sym) ((sym)->type >= SYM_TYPEDEF)
+
+void init_symbols(void);
+
+#endif /* SEMANTIC_H */
+
diff --git a/test-lexing.c b/test-lexing.c
index 9db756ab..007f404c 100644
--- a/test-lexing.c
+++ b/test-lexing.c
@@ -5,6 +5,7 @@
#include <ctype.h>
#include <unistd.h>
#include <fcntl.h>
+
#include "token.h"
void callback(struct token *token)
@@ -20,6 +21,7 @@ int main(int argc, char **argv)
if (fd < 0)
die("No such file: %s", argv[1]);
+
token = tokenize(argv[1], fd);
line = token->line;
while (token) {
diff --git a/test-parsing.c b/test-parsing.c
index 6a16403d..4a994851 100644
--- a/test-parsing.c
+++ b/test-parsing.c
@@ -8,6 +8,7 @@
#include "token.h"
#include "parse.h"
+#include "symbol.h"
int main(int argc, char **argv)
{
@@ -17,6 +18,7 @@ int main(int argc, char **argv)
if (fd < 0)
die("No such file: %s", argv[1]);
+ init_symbols();
token = tokenize(argv[1], fd);
token = parse_expression(token, &expr);
if (token)
diff --git a/token.h b/token.h
index e3ebef0c..6fb072f8 100644
--- a/token.h
+++ b/token.h
@@ -85,7 +85,11 @@ struct string {
char data[];
};
-struct value {
+struct token {
+ unsigned int line;
+ unsigned int pos:16,stream:8,len:8;
+ struct token *next;
+
enum token_type type;
union {
double fpval;
@@ -96,13 +100,8 @@ struct value {
};
};
-struct token {
- unsigned int line;
- unsigned int pos:16,stream:8,len:8;
- struct value value;
- struct token *next;
-};
-
+extern int init_stream(const char *);
+extern struct token *built_in_token(int, const char *);
extern const char *show_special(int op);
extern const char *show_token(const struct token *token);
extern struct token * tokenize(const char *, int);
diff --git a/tokenize.c b/tokenize.c
index bc581681..5ce19969 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -37,14 +37,13 @@ const char *show_special(int val)
const char *show_token(const struct token *token)
{
static char buffer[256];
- const struct value *value = &token->value;
- switch (value->type) {
+ switch (token->type) {
case TOKEN_ERROR:
return "syntax error";
case TOKEN_IDENT: {
- struct ident *ident = value->ident;
+ struct ident *ident = token->ident;
sprintf(buffer, "%.*s", ident->len, ident->name);
return buffer;
}
@@ -52,7 +51,7 @@ const char *show_token(const struct token *token)
case TOKEN_STRING: {
char *ptr;
int i;
- struct string *string = value->string;
+ struct string *string = token->string;
ptr = buffer;
*ptr++ = '"';
@@ -88,24 +87,24 @@ const char *show_token(const struct token *token)
case TOKEN_INTEGER: {
char *ptr;
- ptr = buffer + sprintf(buffer, "%llu", value->intval);
+ ptr = buffer + sprintf(buffer, "%llu", token->intval);
return buffer;
}
case TOKEN_FP: {
- sprintf(buffer, "%f", value->fpval);
+ sprintf(buffer, "%f", token->fpval);
return buffer;
}
case TOKEN_SPECIAL:
- return show_special(value->special);
+ return show_special(token->special);
default:
return "WTF???";
}
}
-static int init_stream(const char *name)
+int init_stream(const char *name)
{
int stream = input_stream_nr;
@@ -121,6 +120,19 @@ static int init_stream(const char *name)
return stream;
}
+struct token * alloc_token(int stream, int line, int pos)
+{
+ struct token *token = malloc(sizeof(struct token));
+ if (!token)
+ die("Out of memory for token");
+
+ memset(token, 0, sizeof(struct token));
+ token->line = line;
+ token->pos = pos;
+ token->stream = stream;
+ return token;
+}
+
#define BUFSIZE (4096)
typedef struct {
int fd, line, pos, offset, size;
@@ -175,8 +187,8 @@ static int do_integer(unsigned long long value, int next, action_t *action)
{
struct token *token = action->token;
- token->value.type = TOKEN_INTEGER;
- token->value.intval = value;
+ token->type = TOKEN_INTEGER;
+ token->intval = value;
add_token(action);
return next;
}
@@ -336,8 +348,8 @@ static int get_char_token(int next, action_t *action)
}
token = action->token;
- token->value.type = TOKEN_INTEGER;
- token->value.intval = value & 0xff;
+ token->type = TOKEN_INTEGER;
+ token->intval = value & 0xff;
add_token(action);
return nextchar(action);
@@ -375,8 +387,8 @@ static int get_string_token(int next, action_t *action)
/* Pass it on.. */
token = action->token;
- token->value.type = TOKEN_STRING;
- token->value.string = string;
+ token->type = TOKEN_STRING;
+ token->string = string;
add_token(action);
return next;
@@ -461,8 +473,8 @@ static int get_one_special(int c, action_t *action)
/* Pass it on.. */
token = action->token;
- token->value.type = TOKEN_SPECIAL;
- token->value.special = value;
+ token->type = TOKEN_SPECIAL;
+ token->special = value;
add_token(action);
return next;
}
@@ -534,6 +546,28 @@ static struct ident *create_hashed_ident(const char *name, int len, unsigned lon
#define ident_hash_add(oldhash,c) ((oldhash)*11 + (c))
#define ident_hash_end(hash) (hash)
+struct token *built_in_token(int stream, const char *name)
+{
+ int len = 1;
+ unsigned long hash;
+ struct token *token;
+ const unsigned char *p = (const unsigned char *)name;
+
+ hash = ident_hash_init(*p++);
+ for (;;) {
+ unsigned int i = *p++;
+ if (!i)
+ break;
+ hash = ident_hash_add(hash, i);
+ len++;
+ }
+ hash = ident_hash_end(hash);
+ token = alloc_token(stream, 0, 0);
+ token->type = TOKEN_IDENT;
+ token->ident = create_hashed_ident(name, len, hash);
+ return token;
+}
+
static int get_one_identifier(int c, action_t *action)
{
struct token *token;
@@ -567,8 +601,8 @@ static int get_one_identifier(int c, action_t *action)
/* Pass it on.. */
token = action->token;
- token->value.type = TOKEN_IDENT;
- token->value.ident = ident;
+ token->type = TOKEN_IDENT;
+ token->ident = ident;
add_token(action);
return next;
}
@@ -606,16 +640,7 @@ struct token * tokenize(const char *name, int fd)
c = nextchar(&action);
while (c != EOF) {
if (!isspace(c)) {
- struct token *token = malloc(sizeof(struct token));
- if (!token)
- die("Out of memory for token");
-
- memset(token, 0, sizeof(struct token));
- token->line = action.line;
- token->pos = action.pos;
- token->stream = stream;
-
- action.token = token;
+ action.token = alloc_token(stream, action.line, action.pos);
c = get_one_token(c, &action);
continue;