8 files changed, 93 insertions, 22 deletions
diff --git a/Makefile b/Makefile
index 618b3637..061b8b40 100644
--- a/Makefile
+++ b/Makefile
@@ -4,11 +4,11 @@ PROGRAMS=test-lexing test-parsing
 
 all: $(PROGRAMS)
 
-test-lexing: test-lexing.o tokenize.o lib.o
-	gcc -o $@ test-lexing.o tokenize.o lib.o
+test-lexing: test-lexing.o tokenize.o pre-process.o lib.o
+	gcc -o $@ test-lexing.o tokenize.o pre-process.o lib.o
 
-test-parsing: test-parsing.o parse.o tokenize.o symbol.o lib.o 
-	gcc -o $@ test-parsing.o parse.o tokenize.o symbol.o lib.o
+test-parsing: test-parsing.o parse.o tokenize.o symbol.o pre-process.o lib.o 
+	gcc -o $@ test-parsing.o parse.o tokenize.o symbol.o pre-process.o lib.o
 
 test-parsing.o: token.h parse.h
 test-lexing.o: token.h
diff --git a/lib.c b/lib.c
index bbfd3d93..89eddea8 100644
--- a/lib.c
+++ b/lib.c
@@ -36,6 +36,17 @@ struct allocator_struct {
 	unsigned int allocations, total_bytes, useful_bytes;
 };
 
+void drop_all_allocations(struct allocator_struct *desc)
+{
+	struct allocation_blob *blob = desc->blobs;
+
+	while (blob) {
+		struct allocation_blob *next = blob->next;
+		free(blob);
+		blob = next;
+	}
+}
+
 void *allocate(struct allocator_struct *desc, unsigned int size)
 {
 	unsigned long alignment = desc->alignment;
diff --git a/parse.c b/parse.c
index 2f207316..78486c7a 100644
--- a/parse.c
+++ b/parse.c
@@ -71,16 +71,6 @@ struct statement *alloc_statement(struct token * token, int type)
 	return stmt;
 }
 
-static int match_op(struct token *token, int op)
-{
-	return token->type == TOKEN_SPECIAL && token->special == op;
-}
-
-static int match_ident(struct token *token, struct ident *id)
-{
-	return token->type == TOKEN_IDENT && token->ident == id;
-}
-
 static int match_oplist(int op, ...)
 {
 	va_list args;
diff --git a/pre-process.c b/pre-process.c
new file mode 100644
index 00000000..7a7a7db0
--- /dev/null
+++ b/pre-process.c
@@ -0,0 +1,51 @@
+/*
+ * Do C preprocessing, based on a token list gathered by
+ * the tokenizer.
+ *
+ * This may not be the smartest preprocessor on the planet.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+
+#include "lib.h"
+#include "token.h"
+#include "symbol.h"
+
+static struct token *preprocessor_line(struct token * head)
+{
+	struct token *token = head->next;	// hash-mark
+
+	do {
+		fprintf(stderr, "%s ", show_token(token));
+		token = token->next;
+	} while (!token->newline && !eof_token(token));
+	fprintf(stderr, "\n");
+	head->next = token;
+	return head;
+}
+
+static void do_preprocess(struct token *head)
+{
+	do {
+		struct token *next = head->next;
+		if (next->newline && match_op(next, '#')) {
+			head = preprocessor_line(head);
+			continue;
+		}
+		head = next;
+	} while (!eof_token(head));
+}
+
+struct token * preprocess(struct token *token)
+{
+	struct token header = { 0, };
+
+	header.next = token;
+	do_preprocess(&header);
+	return header.next;
+}
diff --git a/test-lexing.c b/test-lexing.c
index 263f2e60..72206dbd 100644
--- a/test-lexing.c
+++ b/test-lexing.c
@@ -17,6 +17,7 @@ int main(int argc, char **argv)
 		die("No such file: %s", argv[1]);
 
 	token = tokenize(argv[1], fd);
+	token = preprocess(token);
 	while (!eof_token(token)) {
 		struct token *next = token->next;
 		char separator = '\n';
diff --git a/test-parsing.c b/test-parsing.c
index 94e5e703..7fa39bd4 100644
--- a/test-parsing.c
+++ b/test-parsing.c
@@ -21,7 +21,7 @@ int main(int argc, char **argv)
 		die("No such file: %s", argv[1]);
 	init_symbols();
 	token = tokenize(argv[1], fd);
-
+	token = preprocess(token);
 	translation_unit(token, &list);
 	show_symbol_list(list);
 
diff --git a/token.h b/token.h
index f8b7637d..83ff4395 100644
--- a/token.h
+++ b/token.h
@@ -95,7 +95,8 @@ struct string {
 struct token {
 	unsigned int type:8,
 		     stream:8,
-		     pos:16;
+		     pos:15,
+		     newline:1;
 	unsigned int line;
 	struct token *next;
 	union {
@@ -127,5 +128,16 @@ extern struct token * tokenize(const char *, int);
 extern void die(const char *, ...);
 extern void warn(struct token *, const char *, ...);
 extern void show_identifier_stats(void);
+extern struct token *preprocess(struct token *);
+
+static inline int match_op(struct token *token, int op)
+{
+	return token->type == TOKEN_SPECIAL && token->special == op;
+}
+
+static inline int match_ident(struct token *token, struct ident *id)
+{
+	return token->type == TOKEN_IDENT && token->ident == id;
+}
 
 #endif
diff --git a/tokenize.c b/tokenize.c
index 24a84edb..2f7fda6d 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -145,7 +145,7 @@ struct token * alloc_token(int stream, int line, int pos)
 
 #define BUFSIZE (4096)
 typedef struct {
-	int fd, line, pos, offset, size;
+	int fd, line, pos, offset, size, newline;
 	struct token **tokenlist;
 	struct token *token;
 	unsigned char buffer[BUFSIZE];
@@ -170,6 +170,7 @@ static int nextchar(action_t *action)
 	action->pos++;
 	if (c == '\n') {
 		action->line++;
+		action->newline = 1;
 		action->pos = 0;
 	}
 	return c;
@@ -196,10 +197,8 @@ static void add_token(action_t *action)
 
 static void drop_token(action_t *action)
 {
-	struct token *token = action->token;
-
+	action->newline |= action->token->newline;
 	action->token = NULL;
-	free(token);
 }
 
 static int get_base_number(unsigned int base, char **p, int next, action_t *action)
@@ -663,6 +662,7 @@ struct token * tokenize(const char *name, int fd)
 	action.tokenlist = &retval;
 	action.token = NULL;
 	action.line = 1;
+	action.newline = 1;
 	action.pos = 0;
 	action.fd = fd;
 	action.offset = 0;
@@ -670,9 +670,15 @@ struct token * tokenize(const char *name, int fd)
 
 	c = nextchar(&action);
 	while (c != EOF) {
+		if (c == '\\') {
+			c = nextchar(&action);
+			action.newline = 0;
+		}
 		if (!isspace(c)) {
-			action.token = alloc_token(stream, action.line, action.pos);
-
+			struct token *token = alloc_token(stream, action.line, action.pos);
+			token->newline = action.newline;
+			action.newline = 0;
+			action.token = token;
 			c = get_one_token(c, &action);
 			continue;
 		}