18 files changed, 785 insertions, 422 deletions
diff --git a/Makefile b/Makefile
index 84e5df24..b195528e 100644
--- a/Makefile
+++ b/Makefile
@@ -93,7 +93,7 @@ LIB_H=    token.h parse.h lib.h symbol.h scope.h expression.h target.h \
 
 LIB_OBJS= target.o parse.o tokenize.o pre-process.o symbol.o lib.o scope.o \
 	  expression.o show-parse.o evaluate.o expand.o inline.o linearize.o \
-	  sort.o allocate.o compat-$(OS).o ptrlist.o \
+	  char.o sort.o allocate.o compat-$(OS).o ptrlist.o \
 	  flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o
 
 LIB_FILE= libsparse.a
diff --git a/char.c b/char.c
new file mode 100644
index 00000000..92674565
--- /dev/null
+++ b/char.c
@@ -0,0 +1,131 @@
+#include <string.h>
+#include "target.h"
+#include "lib.h"
+#include "allocate.h"
+#include "token.h"
+#include "expression.h"
+
+static const char *parse_escape(const char *p, unsigned *val, const char *end, int bits, struct position pos)
+{
+	unsigned c = *p++;
+	unsigned d;
+	if (c != '\\') {
+		*val = c;
+		return p;
+	}
+
+	c = *p++;
+	switch (c) {
+	case 'a': c = '\a'; break;
+	case 'b': c = '\b'; break;
+	case 't': c = '\t'; break;
+	case 'n': c = '\n'; break;
+	case 'v': c = '\v'; break;
+	case 'f': c = '\f'; break;
+	case 'r': c = '\r'; break;
+	case 'e': c = '\e'; break;
+	case 'x': {
+		unsigned mask = -(1U << (bits - 4));
+		for (c = 0; p < end; c = (c << 4) + d) {
+			d = hexval(*p++);
+			if (d > 16)
+				break;
+			if (c & mask) {
+				warning(pos,
+					"hex escape sequence out of range");
+				mask = 0;
+			}
+		}
+		break;
+	}
+	case '0'...'7': {
+		if (p + 2 < end)
+			end = p + 2;
+		c -= '0';
+		while (p < end && (d = *p++ - '0') < 8)
+			c = (c << 3) + d;
+		if ((c & 0400) && bits < 9)
+			warning(pos,
+				"octal escape sequence out of range");
+		break;
+	}
+	default:	/* everything else is left as is */
+		break;
+	}
+	*val = c & ~((~0U << (bits - 1)) << 1);
+	return p;
+}
+
+void get_char_constant(struct token *token, unsigned long long *val)
+{
+	const char *p = token->embedded, *end;
+	unsigned v;
+	int type = token_type(token);
+	switch (type) {
+	case TOKEN_CHAR:
+	case TOKEN_WIDE_CHAR:
+		p = token->string->data;
+		end = p + token->string->length;
+		break;
+	case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+		end = p + type - TOKEN_CHAR;
+		break;
+	default:
+		end = p + type - TOKEN_WIDE_CHAR;
+	}
+	p = parse_escape(p, &v, end,
+			type < TOKEN_WIDE_CHAR ? bits_in_char : 32, token->pos);
+	if (p != end)
+		warning(token->pos,
+			"multi-character character constant");
+	*val = v;
+}
+
+struct token *get_string_constant(struct token *token, struct expression *expr)
+{
+	struct string *string = token->string;
+	struct token *next = token->next, *done = NULL;
+	int stringtype = token_type(token);
+	int is_wide = stringtype == TOKEN_WIDE_STRING;
+	static char buffer[MAX_STRING];
+	int len = 0;
+	int bits;
+
+	while (!done) {
+		switch (token_type(next)) {
+		case TOKEN_WIDE_STRING:
+			is_wide = 1;
+		case TOKEN_STRING:
+			next = next->next;
+			break;
+		default:
+			done = next;
+		}
+	}
+	bits = is_wide ? 32 : bits_in_char;
+	while (token != done) {
+		unsigned v;
+		const char *p = token->string->data;
+		const char *end = p + token->string->length - 1;
+		while (p < end) {
+			p = parse_escape(p, &v, end, bits, token->pos);
+			if (len < MAX_STRING)
+				buffer[len] = v;
+			len++;
+		}
+		token = token->next;
+	}
+	if (len > MAX_STRING) {
+		warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", len, MAX_STRING);
+		len = MAX_STRING;
+	}
+
+	if (len >= string->length)	/* can't cannibalize */
+		string = __alloc_string(len+1);
+	string->length = len+1;
+	memcpy(string->data, buffer, len);
+	string->data[len] = '\0';
+	expr->string = string;
+	expr->wide = is_wide;
+	return token;
+}
diff --git a/char.h b/char.h
new file mode 100644
index 00000000..54be6b74
--- /dev/null
+++ b/char.h
@@ -0,0 +1,2 @@
+extern void get_char_constant(struct token *, unsigned long long *);
+extern struct token *get_string_constant(struct token *, struct expression *);
diff --git a/evaluate.c b/evaluate.c
index 0987a5e5..d09f271a 100644
--- a/evaluate.c
+++ b/evaluate.c
@@ -1696,16 +1696,20 @@ static struct symbol *evaluate_postop(struct expression *expr)
 {
 	struct expression *op = expr->unop;
 	struct symbol *ctype = op->ctype;
-	int class = classify_type(op->ctype, &ctype);
+	int class = classify_type(ctype, &ctype);
 	int multiply = 0;
 
+	if (!class || class & TYPE_COMPOUND) {
+		expression_error(expr, "need scalar for ++/--");
+		return NULL;
+	}
 	if (!lvalue_expression(expr->unop)) {
 		expression_error(expr, "need lvalue expression for ++/--");
 		return NULL;
 	}
 
 	if ((class & TYPE_RESTRICT) && restricted_unop(expr->op, &ctype))
-		return bad_expr_type(expr);
+		unrestrict(expr, class, &ctype);
 
 	if (class & TYPE_NUM) {
 		multiply = 1;
@@ -1735,13 +1739,13 @@ static struct symbol *evaluate_sign(struct expression *expr)
 	/* should be an arithmetic type */
 	if (!(class & TYPE_NUM))
 		return bad_expr_type(expr);
-	if (!(class & (TYPE_FLOAT|TYPE_RESTRICT))) {
-		struct symbol *rtype = integer_promotion(ctype);
-		expr->unop = cast_to(expr->unop, rtype);
-		ctype = rtype;
-	} else if ((class & TYPE_FLOAT) && expr->op != '~') {
-		/* no conversions needed */
-	} else if ((class & TYPE_RESTRICT) && !restricted_unop(expr->op, &ctype)) {
+	if (class & TYPE_RESTRICT)
+		goto Restr;
+Normal:
+	if (!(class & TYPE_FLOAT)) {
+		ctype = integer_promotion(ctype);
+		expr->unop = cast_to(expr->unop, ctype);
+	} else if (expr->op != '~') {
 		/* no conversions needed */
 	} else {
 		return bad_expr_type(expr);
@@ -1750,6 +1754,10 @@ static struct symbol *evaluate_sign(struct expression *expr)
 		*expr = *expr->unop;
 	expr->ctype = ctype;
 	return ctype;
+Restr:
+	if (restricted_unop(expr->op, &ctype))
+		unrestrict(expr, class, &ctype);
+	goto Normal;
 }
 
 static struct symbol *evaluate_preop(struct expression *expr)
diff --git a/expression.c b/expression.c
index 9f45c794..d2437c74 100644
--- a/expression.c
+++ b/expression.c
@@ -26,6 +26,7 @@
 #include "scope.h"
 #include "expression.h"
 #include "target.h"
+#include "char.h"
 
 static int match_oplist(int op, ...)
 {
@@ -64,53 +65,50 @@ struct token *parens_expression(struct token *token, struct expression **expr, c
  * Handle __func__, __FUNCTION__ and __PRETTY_FUNCTION__ token
  * conversion
  */
-static int convert_one_fn_token(struct token *token)
+static struct symbol *handle_func(struct token *token)
 {
-	struct symbol *sym = current_fn;
-
-	if (sym) {
-		struct ident *ident = sym->ident;
-		if (ident) {
-			int len = ident->len;
-			struct string *string;
-
-			string = __alloc_string(len+1);
-			memcpy(string->data, ident->name, len);
-			string->data[len] = 0;
-			string->length = len+1;
-			token_type(token) = TOKEN_STRING;
-			token->string = string;
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static int convert_function(struct token *next)
-{
-	int retval = 0;
-	for (;;) {
-		struct token *token = next;
-		next = next->next;
-		switch (token_type(token)) {
-		case TOKEN_STRING:
-			continue;
-		case TOKEN_IDENT:
-			if (token->ident == &__func___ident ||
-			    token->ident == &__FUNCTION___ident ||
-			    token->ident == &__PRETTY_FUNCTION___ident) {
-				if (!convert_one_fn_token(token))
-					break;
-				retval = 1;
-				continue;
-			}
-		/* Fall through */
-		default:
-			break;
-		}
-		break;
-	}
-	return retval;
+	struct ident *ident = token->ident;
+	struct symbol *decl, *array;
+	struct string *string;
+	int len;
+
+	if (ident != &__func___ident &&
+	    ident != &__FUNCTION___ident &&
+	    ident != &__PRETTY_FUNCTION___ident)
+		return NULL;
+
+	if (!current_fn)
+		return NULL;
+
+	/* OK, it's one of ours */
+	array = alloc_symbol(token->pos, SYM_ARRAY);
+	array->ctype.base_type = &char_ctype;
+	array->ctype.alignment = 1;
+	array->endpos = token->pos;
+	decl = alloc_symbol(token->pos, SYM_NODE);
+	decl->ctype.base_type = array;
+	decl->ctype.alignment = 1;
+	decl->ctype.modifiers = MOD_STATIC;
+	decl->endpos = token->pos;
+
+	/* function-scope, but in NS_SYMBOL */
+	bind_symbol(decl, ident, NS_LABEL);
+	decl->namespace = NS_SYMBOL;
+
+	len = current_fn->ident->len;
+	string = __alloc_string(len + 1);
+	memcpy(string->data, current_fn->ident->name, len);
+	string->data[len] = 0;
+	string->length = len + 1;
+
+	decl->initializer = alloc_expression(token->pos, EXPR_STRING);
+	decl->initializer->string = string;
+	decl->initializer->ctype = decl;
+	decl->array_size = alloc_const_expression(token->pos, len + 1);
+	array->array_size = decl->array_size;
+	decl->bit_size = array->bit_size = bytes_to_bits(len + 1);
+
+	return decl;
 }
 
 static struct token *parse_type(struct token *token, struct expression **tree)
@@ -220,50 +218,6 @@ static struct token *builtin_offsetof_expr(struct token *token,
 	}
 }
 
-static struct token *string_expression(struct token *token, struct expression *expr)
-{
-	struct string *string = token->string;
-	struct token *next = token->next;
-	int stringtype = token_type(token);
-
-	convert_function(token);
-
-	if (token_type(next) == stringtype) {
-		int totlen = string->length-1;
-		char *data;
-
-		do {
-			totlen += next->string->length-1;
-			next = next->next;
-		} while (token_type(next) == stringtype);
-
-		if (totlen > MAX_STRING) {
-			warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
-			totlen = MAX_STRING;
-		}
-
-		string = __alloc_string(totlen+1);
-		string->length = totlen+1;
-		data = string->data;
-		next = token;
-		do {
-			struct string *s = next->string;
-			int len = s->length-1;
-
-			if (len > totlen)
-				len = totlen;
-			totlen -= len;
-
-			next = next->next;
-			memcpy(data, s->data, len);
-			data += len;
-		} while (token_type(next) == stringtype);
-		*data = '\0';
-	}
-	expr->string = string;
-	return next;
-}
-
 #ifndef ULLONG_MAX
 #define ULLONG_MAX (~0ULL)
 #endif
@@ -404,12 +358,11 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 	struct expression *expr = NULL;
 
 	switch (token_type(token)) {
-	case TOKEN_CHAR:
-	case TOKEN_WIDE_CHAR:
+	case TOKEN_CHAR ... TOKEN_WIDE_CHAR + 4:
 		expr = alloc_expression(token->pos, EXPR_VALUE);   
 		expr->flags = Int_const_expr;
-		expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype;
-		expr->value = (unsigned char) token->character;
+		expr->ctype = token_type(token) < TOKEN_WIDE_CHAR ? &int_ctype : &long_ctype;
+		get_char_constant(token, &expr->value);
 		token = token->next;
 		break;
 
@@ -434,8 +387,7 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 		struct token *next = token->next;
 
 		if (!sym) {
-			if (convert_function(token))
-				goto handle_string;
+			sym = handle_func(token);
 			if (token->ident == &__builtin_types_compatible_p_ident) {
 				token = builtin_types_compatible_p_expr(token, &expr);
 				break;
@@ -473,13 +425,10 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 	}
 
 	case TOKEN_STRING:
-	case TOKEN_WIDE_STRING: {
-	handle_string:
+	case TOKEN_WIDE_STRING:
 		expr = alloc_expression(token->pos, EXPR_STRING);
-		expr->wide = token_type(token) == TOKEN_WIDE_STRING;
-		token = string_expression(token, expr);
+		token = get_string_constant(token, expr);
 		break;
-	}
 
 	case TOKEN_SPECIAL:
 		if (token->special == '(') {
diff --git a/lib.c b/lib.c
index bb814f2e..6bd10d36 100644
--- a/lib.c
+++ b/lib.c
@@ -234,8 +234,8 @@ int arch_m64 = ARCH_M64_DEFAULT;
 int arch_msize_long = 0;
 
 #define CMDLINE_INCLUDE 20
-int cmdline_include_nr = 0;
-struct cmdline_include cmdline_include[CMDLINE_INCLUDE];
+static int cmdline_include_nr = 0;
+static char *cmdline_include[CMDLINE_INCLUDE];
 
 
 void add_pre_buffer(const char *fmt, ...)
@@ -308,16 +308,9 @@ static char **handle_switch_I(char *arg, char **next)
 
 static void add_cmdline_include(char *filename)
 {
-	int fd = open(filename, O_RDONLY);
-	if (fd < 0) {
-		perror(filename);
-		return;
-	}
 	if (cmdline_include_nr >= CMDLINE_INCLUDE)
 		die("too many include files for %s\n", filename);
-	cmdline_include[cmdline_include_nr].filename = filename;
-	cmdline_include[cmdline_include_nr].fd = fd;
-	cmdline_include_nr++;
+	cmdline_include[cmdline_include_nr++] = filename;
 }
 
 static char **handle_switch_i(char *arg, char **next)
@@ -930,19 +923,13 @@ static struct symbol_list *sparse_file(const char *filename)
  */
 static struct symbol_list *sparse_initial(void)
 {
-	struct token *token;
 	int i;
 
 	// Prepend any "include" file to the stream.
 	// We're in global scope, it will affect all files!
-	token = NULL;
-	for (i = cmdline_include_nr - 1; i >= 0; i--)
-		token = tokenize(cmdline_include[i].filename, cmdline_include[i].fd,
-				 token, includepath);
-
-	// Prepend the initial built-in stream
-	if (token)
-		pre_buffer_end->next = token;
+	for (i = 0; i < cmdline_include_nr; i++)
+		add_pre_buffer("#argv_include \"%s\"\n", cmdline_include[i]);
+
 	return sparse_tokenstream(pre_buffer_begin);
 }
 
diff --git a/lib.h b/lib.h
index 2cea2520..ee954fed 100644
--- a/lib.h
+++ b/lib.h
@@ -41,15 +41,6 @@ struct position {
 		     noexpand:1;
 };
 
-struct cmdline_include {
-	char *filename;
-	int fd;
-};
-
-extern struct cmdline_include cmdline_include[];
-extern int cmdline_include_nr;
-
-
 struct ident;
 struct token;
 struct symbol;
diff --git a/pre-process.c b/pre-process.c
index 8a16f8b3..e5f56b40 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -82,8 +82,6 @@ static struct token *alloc_token(struct position *pos)
 	return token;
 }
 
-static const char *show_token_sequence(struct token *token);
-
 /* Expand symbol 'sym' at '*list' */
 static int expand(struct token **, struct symbol *);
 
@@ -340,9 +338,35 @@ static struct token *dup_list(struct token *list)
 	return res;
 }
 
+static const char *quote_token_sequence(struct token *token)
+{
+	static char buffer[1024];
+	char *ptr = buffer;
+	int whitespace = 0;
+
+	while (!eof_token(token)) {
+		const char *val = quote_token(token);
+		int len = strlen(val);
+
+		if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
+			sparse_error(token->pos, "too long token expansion");
+			break;
+		}
+
+		if (whitespace)
+			*ptr++ = ' ';
+		memcpy(ptr, val, len);
+		ptr += len;
+		token = token->next;
+		whitespace = token->pos.whitespace;
+	}
+	*ptr = 0;
+	return buffer;
+}
+
 static struct token *stringify(struct token *arg)
 {
-	const char *s = show_token_sequence(arg);
+	const char *s = quote_token_sequence(arg);
 	int size = strlen(s)+1;
 	struct token *token = __alloc_token(0);
 	struct string *string = __alloc_string(size);
@@ -383,6 +407,8 @@ static void expand_arguments(int count, struct arg *args)
  * Possibly valid combinations:
  *  - ident + ident -> ident
  *  - ident + number -> ident unless number contains '.', '+' or '-'.
+ *  - 'L' + char constant -> wide char constant
+ *  - 'L' + string literal -> wide string literal
  *  - number + number -> number
  *  - number + ident -> number
  *  - number + '.' -> number
@@ -398,6 +424,13 @@ static enum token_type combine(struct token *left, struct token *right, char *p)
 	if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
 		return TOKEN_ERROR;
 
+	if (t1 == TOKEN_IDENT && left->ident == &L_ident) {
+		if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR)
+			return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR;
+		if (t2 == TOKEN_STRING)
+			return TOKEN_WIDE_STRING;
+	}
+
 	if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
 		return TOKEN_ERROR;
 
@@ -440,9 +473,10 @@ static enum token_type combine(struct token *left, struct token *right, char *p)
 static int merge(struct token *left, struct token *right)
 {
 	static char buffer[512];
+	enum token_type res = combine(left, right, buffer);
 	int n;
 
-	switch (combine(left, right, buffer)) {
+	switch (res) {
 	case TOKEN_IDENT:
 		left->ident = built_in_ident(buffer);
 		left->pos.noexpand = 0;
@@ -465,6 +499,21 @@ static int merge(struct token *left, struct token *right)
 				return 1;
 			}
 		}
+		break;
+
+	case TOKEN_WIDE_CHAR:
+	case TOKEN_WIDE_STRING:
+		token_type(left) = res;
+		left->pos.noexpand = 0;
+		left->string = right->string;
+		return 1;
+
+	case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+		token_type(left) = res;
+		left->pos.noexpand = 0;
+		memcpy(left->embedded, right->embedded, 4);
+		return 1;
+
 	default:
 		;
 	}
@@ -472,12 +521,12 @@ static int merge(struct token *left, struct token *right)
 	return 0;
 }
 
-static struct token *dup_token(struct token *token, struct position *streampos, struct position *pos)
+static struct token *dup_token(struct token *token, struct position *streampos)
 {
 	struct token *alloc = alloc_token(streampos);
 	token_type(alloc) = token_type(token);
-	alloc->pos.newline = pos->newline;
-	alloc->pos.whitespace = pos->whitespace;
+	alloc->pos.newline = token->pos.newline;
+	alloc->pos.whitespace = token->pos.whitespace;
 	alloc->number = token->number;
 	alloc->pos.noexpand = token->pos.noexpand;
 	return alloc;	
@@ -489,7 +538,7 @@ static struct token **copy(struct token **where, struct token *list, int *count)
 	while (!eof_token(list)) {
 		struct token *token;
 		if (need_copy)
-			token = dup_token(list, &list->pos, &list->pos);
+			token = dup_token(list, &list->pos);
 		else
 			token = list;
 		if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
@@ -502,17 +551,37 @@ static struct token **copy(struct token **where, struct token *list, int *count)
 	return where;
 }
 
+static int handle_kludge(struct token **p, struct arg *args)
+{
+	struct token *t = (*p)->next->next;
+	while (1) {
+		struct arg *v = &args[t->argnum];
+		if (token_type(t->next) != TOKEN_CONCAT) {
+			if (v->arg) {
+				/* ignore the first ## */
+				*p = (*p)->next;
+				return 0;
+			}
+			/* skip the entire thing */
+			*p = t;
+			return 1;
+		}
+		if (v->arg && !eof_token(v->arg))
+			return 0; /* no magic */
+		t = t->next->next;
+	}
+}
+
 static struct token **substitute(struct token **list, struct token *body, struct arg *args)
 {
-	struct token *token = *list;
-	struct position *base_pos = &token->pos;
-	struct position *pos = base_pos;
+	struct position *base_pos = &(*list)->pos;
 	int *count;
 	enum {Normal, Placeholder, Concat} state = Normal;
 
-	for (; !eof_token(body); body = body->next, pos = &body->pos) {
+	for (; !eof_token(body); body = body->next) {
 		struct token *added, *arg;
 		struct token **tail;
+		struct token *t;
 
 		switch (token_type(body)) {
 		case TOKEN_GNU_KLUDGE:
@@ -520,13 +589,20 @@ static struct token **substitute(struct token **list, struct token *body, struct
 			 * GNU kludge: if we had <comma>##<vararg>, behaviour
 			 * depends on whether we had enough arguments to have
 			 * a vararg.  If we did, ## is just ignored.  Otherwise
-			 * both , and ## are ignored.  Comma should come from
-			 * the body of macro and not be an argument of earlier
-			 * concatenation.
+			 * both , and ## are ignored.  Worse, there can be
+			 * an arbitrary number of ##<arg> in between; if all of
+			 * those are empty, we act as if they hadn't been there,
+			 * otherwise we act as if the kludge didn't exist.
 			 */
-			if (!args[body->next->argnum].arg)
+			t = body;
+			if (handle_kludge(&body, args)) {
+				if (state == Concat)
+					state = Normal;
+				else
+					state = Placeholder;
 				continue;
-			added = dup_token(body, base_pos, pos);
+			}
+			added = dup_token(t, base_pos);
 			token_type(added) = TOKEN_SPECIAL;
 			tail = &added->next;
 			break;
@@ -557,8 +633,8 @@ static struct token **substitute(struct token **list, struct token *body, struct
 			}
 		copy_arg:
 			tail = copy(&added, arg, count);
-			added->pos.newline = pos->newline;
-			added->pos.whitespace = pos->whitespace;
+			added->pos.newline = body->pos.newline;
+			added->pos.whitespace = body->pos.whitespace;
 			break;
 
 		case TOKEN_CONCAT:
@@ -569,14 +645,14 @@ static struct token **substitute(struct token **list, struct token *body, struct
 			continue;
 
 		case TOKEN_IDENT:
-			added = dup_token(body, base_pos, pos);
+			added = dup_token(body, base_pos);
 			if (added->ident->tainted)
 				added->pos.noexpand = 1;
 			tail = &added->next;
 			break;
 
 		default:
-			added = dup_token(body, base_pos, pos);
+			added = dup_token(body, base_pos);
 			tail = &added->next;
 			break;
 		}
@@ -625,6 +701,14 @@ static int expand(struct token **list, struct symbol *sym)
 
 	last = token->next;
 	tail = substitute(list, sym->expansion, args);
+	/*
+	 * Note that it won't be eof - at least TOKEN_UNTAINT will be there.
+	 * We still can lose the newline flag if the sucker expands to nothing,
+	 * but the price of dealing with that is probably too high (we'd need
+	 * to collect the flags during scan_next())
+	 */
+	(*list)->pos.newline = token->pos.newline;
+	(*list)->pos.whitespace = token->pos.whitespace;
 	*tail = last;
 
 	return 0;
@@ -767,31 +851,6 @@ static int do_include_path(const char **pptr, struct token **list, struct token
 	return 0;
 }
 
-static void do_include(int local, struct stream *stream, struct token **list, struct token *token, const char *filename, const char **path)
-{
-	int flen = strlen(filename) + 1;
-
-	/* Absolute path? */
-	if (filename[0] == '/') {
-		if (try_include("", filename, flen, list, includepath))
-			return;
-		goto out;
-	}
-
-	/* Dir of input file is first dir to search for quoted includes */
-	set_stream_include_path(stream);
-
-	if (!path)
-		/* Do not search quote include if <> is in use */
-		path = local ? quote_includepath : angle_includepath;
-
-	/* Check the standard include paths.. */
-	if (do_include_path(path, list, token, filename, flen))
-		return;
-out:
-	error_die(token->pos, "unable to open '%s'", filename);
-}
-
 static int free_preprocessor_line(struct token *token)
 {
 	while (token_type(token) != TOKEN_EOF) {
@@ -802,11 +861,13 @@ static int free_preprocessor_line(struct token *token)
 	return 1;
 }
 
-static int handle_include_path(struct stream *stream, struct token **list, struct token *token, const char **path)
+static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how)
 {
 	const char *filename;
 	struct token *next;
+	const char **path;
 	int expect;
+	int flen;
 
 	next = token->next;
 	expect = '>';
@@ -819,20 +880,52 @@ static int handle_include_path(struct stream *stream, struct token **list, struc
 			expect = '>';
 		}
 	}
+
 	token = next->next;
 	filename = token_name_sequence(token, expect, token);
-	do_include(!expect, stream, list, token, filename, path);
-	return 0;
+	flen = strlen(filename) + 1;
+
+	/* Absolute path? */
+	if (filename[0] == '/') {
+		if (try_include("", filename, flen, list, includepath))
+			return 0;
+		goto out;
+	}
+
+	switch (how) {
+	case 1:
+		path = stream->next_path;
+		break;
+	case 2:
+		includepath[0] = "";
+		path = includepath;
+		break;
+	default:
+		/* Dir of input file is first dir to search for quoted includes */
+		set_stream_include_path(stream);
+		path = expect ? angle_includepath : quote_includepath;
+		break;
+	}
+	/* Check the standard include paths.. */
+	if (do_include_path(path, list, token, filename, flen))
+		return 0;
+out:
+	error_die(token->pos, "unable to open '%s'", filename);
 }
 
 static int handle_include(struct stream *stream, struct token **list, struct token *token)
 {
-	return handle_include_path(stream, list, token, NULL);
+	return handle_include_path(stream, list, token, 0);
 }
 
 static int handle_include_next(struct stream *stream, struct token **list, struct token *token)
 {
-	return handle_include_path(stream, list, token, stream->next_path);
+	return handle_include_path(stream, list, token, 1);
+}
+
+static int handle_argv_include(struct stream *stream, struct token **list, struct token *token)
+{
+	return handle_include_path(stream, list, token, 2);
 }
 
 static int token_different(struct token *t1, struct token *t2)
@@ -863,10 +956,12 @@ static int token_different(struct token *t1, struct token *t2)
 	case TOKEN_STR_ARGUMENT:
 		different = t1->argnum != t2->argnum;
 		break;
+	case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+	case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+		different = memcmp(t1->embedded, t2->embedded, 4);
+		break;
 	case TOKEN_CHAR:
 	case TOKEN_WIDE_CHAR:
-		different = t1->character != t2->character;
-		break;
 	case TOKEN_STRING:
 	case TOKEN_WIDE_STRING: {
 		struct string *s1, *s2;
@@ -1035,6 +1130,10 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 			}
 			if (n)
 				return count->vararg ? 2 : 1;
+			/*
+			 * XXX - need saner handling of that
+			 * (>= 1024 instances of argument)
+			 */
 			token_type(token) = TOKEN_ERROR;
 			return -1;
 		}
@@ -1042,49 +1141,103 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 	return 0;
 }
 
+static struct token *handle_hash(struct token **p, struct token *arglist)
+{
+	struct token *token = *p;
+	if (arglist) {
+		struct token *next = token->next;
+		if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
+			goto Equote;
+		next->pos.whitespace = token->pos.whitespace;
+		__free_token(token);
+		token = *p = next;
+	} else {
+		token->pos.noexpand = 1;
+	}
+	return token;
+
+Equote:
+	sparse_error(token->pos, "'#' is not followed by a macro parameter");
+	return NULL;
+}
+
+/* token->next is ## */
+static struct token *handle_hashhash(struct token *token, struct token *arglist)
+{
+	struct token *last = token;
+	struct token *concat;
+	int state = match_op(token, ',');
+	
+	try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+
+	while (1) {
+		struct token *t;
+		int is_arg;
+
+		/* eat duplicate ## */
+		concat = token->next;
+		while (match_op(t = concat->next, SPECIAL_HASHHASH)) {
+			token->next = t;
+			__free_token(concat);
+			concat = t;
+		}
+		token_type(concat) = TOKEN_CONCAT;
+
+		if (eof_token(t))
+			goto Econcat;
+
+		if (match_op(t, '#')) {
+			t = handle_hash(&concat->next, arglist);
+			if (!t)
+				return NULL;
+		}
+
+		is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
+
+		if (state == 1 && is_arg) {
+			state = is_arg;
+		} else {
+			last = t;
+			state = match_op(t, ',');
+		}
+
+		token = t;
+		if (!match_op(token->next, SPECIAL_HASHHASH))
+			break;
+	}
+	/* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */
+	if (state == 2)
+		token_type(last) = TOKEN_GNU_KLUDGE;
+	return token;
+
+Econcat:
+	sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion");
+	return NULL;
+}
+
 static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
 {
 	struct token *token = expansion;
 	struct token **p;
-	struct token *last = NULL;
 
 	if (match_op(token, SPECIAL_HASHHASH))
 		goto Econcat;
 
 	for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
 		if (match_op(token, '#')) {
-			if (arglist) {
-				struct token *next = token->next;
-				if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
-					goto Equote;
-				next->pos.whitespace = token->pos.whitespace;
-				token = *p = next;
-			} else {
-				token->pos.noexpand = 1;
-			}
-		} else if (match_op(token, SPECIAL_HASHHASH)) {
-			struct token *next = token->next;
-			int arg = try_arg(next, TOKEN_QUOTED_ARGUMENT, arglist);
-			token_type(token) = TOKEN_CONCAT;
-			if (arg) {
-				token = next;
-				/* GNU kludge */
-				if (arg == 2 && last && match_op(last, ',')) {
-					token_type(last) = TOKEN_GNU_KLUDGE;
-					last->next = token;
-				}
-			} else if (match_op(next, SPECIAL_HASHHASH))
-				token = next;
-			else if (eof_token(next))
-				goto Econcat;
-		} else if (match_op(token->next, SPECIAL_HASHHASH)) {
-			try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+			token = handle_hash(p, arglist);
+			if (!token)
+				return NULL;
+		}
+		if (match_op(token->next, SPECIAL_HASHHASH)) {
+			token = handle_hashhash(token, arglist);
+			if (!token)
+				return NULL;
 		} else {
 			try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
 		}
 		if (token_type(token) == TOKEN_ERROR)
 			goto Earg;
-		last = token;
 	}
 	token = alloc_token(&expansion->pos);
 	token_type(token) = TOKEN_UNTAINT;
@@ -1093,10 +1246,6 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
 	*p = token;
 	return expansion;
 
-Equote:
-	sparse_error(token->pos, "'#' is not followed by a macro parameter");
-	return NULL;
-
 Econcat:
 	sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
 	return NULL;
@@ -1287,6 +1436,8 @@ static int handle_ifndef(struct stream *stream, struct token **line, struct toke
 	return preprocessor_if(stream, token, arg);
 }
 
+static const char *show_token_sequence(struct token *token);
+
 /*
  * Expression handling for #if and #elif; it differs from normal expansion
  * due to special treatment of "defined".
@@ -1709,6 +1860,7 @@ static void init_preprocessor(void)
 		{ "add_system",    handle_add_system },
 		{ "add_dirafter",  handle_add_dirafter },
 		{ "split_include", handle_split_include },
+		{ "argv_include",  handle_argv_include },
 	}, special[] = {
 		{ "ifdef",	handle_ifdef },
 		{ "ifndef",	handle_ifndef },
diff --git a/token.h b/token.h
index cd292331..20c23268 100644
--- a/token.h
+++ b/token.h
@@ -68,8 +68,8 @@ enum token_type {
 	TOKEN_ZERO_IDENT,
 	TOKEN_NUMBER,
 	TOKEN_CHAR,
-	TOKEN_WIDE_CHAR,
-	TOKEN_STRING,
+	TOKEN_WIDE_CHAR = TOKEN_CHAR + 5,
+	TOKEN_STRING = TOKEN_WIDE_CHAR + 5,
 	TOKEN_WIDE_STRING,
 	TOKEN_SPECIAL,
 	TOKEN_STREAMBEGIN,
@@ -165,9 +165,9 @@ struct token {
 		struct ident *ident;
 		unsigned int special;
 		struct string *string;
-		int character;
 		int argnum;
 		struct argcount count;
+		char embedded[4];
 	};
 };
 
@@ -198,6 +198,7 @@ extern const char *show_special(int);
 extern const char *show_ident(const struct ident *);
 extern const char *show_string(const struct string *string);
 extern const char *show_token(const struct token *);
+extern const char *quote_token(const struct token *);
 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 
diff --git a/tokenize.c b/tokenize.c
index d4f05e56..95f308e0 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -121,6 +121,42 @@ const char *show_string(const struct string *string)
 	return buffer;
 }
 
+static const char *show_char(const char *s, size_t len, char prefix, char delim)
+{
+	static char buffer[MAX_STRING + 4];
+	char *p = buffer;
+	if (prefix)
+		*p++ = prefix;
+	*p++ = delim;
+	memcpy(p, s, len);
+	p += len;
+	*p++ = delim;
+	*p++ = '\0';
+	return buffer;
+}
+
+static const char *quote_char(const char *s, size_t len, char prefix, char delim)
+{
+	static char buffer[2*MAX_STRING + 6];
+	size_t i;
+	char *p = buffer;
+	if (prefix)
+		*p++ = prefix;
+	if (delim == '"')
+		*p++ = '\\';
+	*p++ = delim;
+	for (i = 0; i < len; i++) {
+		if (s[i] == '"' || s[i] == '\\')
+			*p++ = '\\';
+		*p++ = s[i];
+	}
+	if (delim == '"')
+		*p++ = '\\';
+	*p++ = delim;
+	*p++ = '\0';
+	return buffer;
+}
+
 const char *show_token(const struct token *token)
 {
 	static char buffer[256];
@@ -137,10 +173,6 @@ const char *show_token(const struct token *token)
 	case TOKEN_IDENT:
 		return show_ident(token->ident);
 
-	case TOKEN_STRING:
-	case TOKEN_WIDE_STRING:
-		return show_string(token->string);
-
 	case TOKEN_NUMBER:
 		return token->number;
 
@@ -148,15 +180,23 @@ const char *show_token(const struct token *token)
 		return show_special(token->special);
 
 	case TOKEN_CHAR: 
-	case TOKEN_WIDE_CHAR: {
-		char *ptr = buffer;
-		int c = token->character;
-		*ptr++ = '\'';
-		ptr = charstr(ptr, c, '\'', 0);
-		*ptr++ = '\'';
-		*ptr++ = '\0';
-		return buffer;
-	}
+		return show_char(token->string->data,
+			token->string->length - 1, 0, '\'');
+	case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+		return show_char(token->embedded,
+			token_type(token) - TOKEN_CHAR, 0, '\'');
+	case TOKEN_WIDE_CHAR: 
+		return show_char(token->string->data,
+			token->string->length - 1, 'L', '\'');
+	case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+		return show_char(token->embedded,
+			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+	case TOKEN_STRING: 
+		return show_char(token->string->data,
+			token->string->length - 1, 0, '"');
+	case TOKEN_WIDE_STRING: 
+		return show_char(token->string->data,
+			token->string->length - 1, 'L', '"');
 
 	case TOKEN_STREAMBEGIN:
 		sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
@@ -180,6 +220,47 @@ const char *show_token(const struct token *token)
 	}
 }
 
+const char *quote_token(const struct token *token)
+{
+	static char buffer[256];
+
+	switch (token_type(token)) {
+	case TOKEN_ERROR:
+		return "syntax error";
+
+	case TOKEN_IDENT:
+		return show_ident(token->ident);
+
+	case TOKEN_NUMBER:
+		return token->number;
+
+	case TOKEN_SPECIAL:
+		return show_special(token->special);
+
+	case TOKEN_CHAR: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 0, '\'');
+	case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+		return quote_char(token->embedded,
+			token_type(token) - TOKEN_CHAR, 0, '\'');
+	case TOKEN_WIDE_CHAR: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 'L', '\'');
+	case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+		return quote_char(token->embedded,
+			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+	case TOKEN_STRING: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 0, '"');
+	case TOKEN_WIDE_STRING: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 'L', '"');
+	default:
+		sprintf(buffer, "unhandled token type '%d' ", token_type(token));
+		return buffer;
+	}
+}
+
 #define HASHED_INPUT_BITS (6)
 #define HASHED_INPUT (1 << HASHED_INPUT_BITS)
 #define HASH_PRIME 0x9e370001UL
@@ -241,10 +322,10 @@ static int nextchar_slow(stream_t *stream)
 	int offset = stream->offset;
 	int size = stream->size;
 	int c;
-	int spliced = 0, had_cr, had_backslash, complain;
+	int spliced = 0, had_cr, had_backslash;
 
 restart:
-	had_cr = had_backslash = complain = 0;
+	had_cr = had_backslash = 0;
 
 repeat:
 	if (offset >= size) {
@@ -258,48 +339,53 @@ repeat:
 	}
 
 	c = stream->buffer[offset++];
-
-	if (had_cr && c != '\n')
-		complain = 1;
+	if (had_cr)
+		goto check_lf;
 
 	if (c == '\r') {
 		had_cr = 1;
 		goto repeat;
 	}
 
-	stream->pos += (c == '\t') ? (tabstop - stream->pos % tabstop) : 1;
-
-	if (c == '\n') {
-		stream->line++;
-		stream->pos = 0;
-	}
-
+norm:
 	if (!had_backslash) {
-		if (c == '\\') {
+		switch (c) {
+		case '\t':
+			stream->pos += tabstop - stream->pos % tabstop;
+			break;
+		case '\n':
+			stream->line++;
+			stream->pos = 0;
+			stream->newline = 1;
+			break;
+		case '\\':
 			had_backslash = 1;
+			stream->pos++;
 			goto repeat;
+		default:
+			stream->pos++;
 		}
-		if (c == '\n')
-			stream->newline = 1;
 	} else {
 		if (c == '\n') {
-			if (complain)
-				warning(stream_pos(stream), "non-ASCII data stream");
+			stream->line++;
+			stream->pos = 0;
 			spliced = 1;
 			goto restart;
 		}
-		stream->pos--;
 		offset--;
 		c = '\\';
 	}
-
 out:
 	stream->offset = offset;
-	if (complain)
-		warning(stream_pos(stream), "non-ASCII data stream");
 
 	return c;
 
+check_lf:
+	if (c != '\n')
+		offset--;
+	c = '\n';
+	goto norm;
+
 got_eof:
 	if (had_backslash) {
 		c = '\\';
@@ -307,8 +393,6 @@ got_eof:
 	}
 	if (stream->pos)
 		warning(stream_pos(stream), "no newline at end of file");
-	else if (had_cr)
-		warning(stream_pos(stream), "non-ASCII data stream");
 	else if (spliced)
 		warning(stream_pos(stream), "backslash-newline at end of file");
 	return EOF;
@@ -380,22 +464,36 @@ enum {
 	Exp = 8,
 	Dot = 16,
 	ValidSecond = 32,
+	Quote = 64,
+	Escape = 128,
 };
 
 static const long cclass[257] = {
-	['0' + 1 ... '9' + 1] = Digit | Hex,
+	['0' + 1 ... '7' + 1] = Digit | Hex | Escape,	/* \<octal> */
+	['8' + 1 ... '9' + 1] = Digit | Hex,
 	['A' + 1 ... 'D' + 1] = Letter | Hex,
-	['E' + 1] = Letter | Hex | Exp,
+	['E' + 1] = Letter | Hex | Exp,	/* E<exp> */
 	['F' + 1] = Letter | Hex,
 	['G' + 1 ... 'O' + 1] = Letter,
-	['P' + 1] = Letter | Exp,
+	['P' + 1] = Letter | Exp,	/* P<exp> */
 	['Q' + 1 ... 'Z' + 1] = Letter,
-	['a' + 1 ... 'd' + 1] = Letter | Hex,
-	['e' + 1] = Letter | Hex | Exp,
-	['f' + 1] = Letter | Hex,
-	['g' + 1 ... 'o' + 1] = Letter,
-	['p' + 1] = Letter | Exp,
-	['q' + 1 ... 'z' + 1] = Letter,
+	['a' + 1 ... 'b' + 1] = Letter | Hex | Escape, /* \a, \b */
+	['c' + 1 ... 'd' + 1] = Letter | Hex,
+	['e' + 1] = Letter | Hex | Exp | Escape,/* \e, e<exp> */
+	['f' + 1] = Letter | Hex | Escape,	/* \f */
+	['g' + 1 ... 'm' + 1] = Letter,
+	['n' + 1] = Letter | Escape,	/* \n */
+	['o' + 1] = Letter,
+	['p' + 1] = Letter | Exp,	/* p<exp> */
+	['q' + 1] = Letter,
+	['r' + 1] = Letter | Escape,	/* \r */
+	['s' + 1] = Letter,
+	['t' + 1] = Letter | Escape,	/* \t */
+	['u' + 1] = Letter,
+	['v' + 1] = Letter | Escape,	/* \v */
+	['w' + 1] = Letter,
+	['x' + 1] = Letter | Escape,	/* \x<hex> */
+	['y' + 1 ... 'z' + 1] = Letter,
 	['_' + 1] = Letter,
 	['.' + 1] = Dot | ValidSecond,
 	['=' + 1] = ValidSecond,
@@ -406,6 +504,10 @@ static const long cclass[257] = {
 	['&' + 1] = ValidSecond,
 	['|' + 1] = ValidSecond,
 	['#' + 1] = ValidSecond,
+	['\'' + 1] = Quote | Escape,
+	['"' + 1] = Quote | Escape,
+	['\\' + 1] = Escape,
+	['?' + 1] = Escape,
 };
 
 /*
@@ -465,151 +567,74 @@ static int get_one_number(int c, int next, stream_t *stream)
 	return next;
 }
 
-static int escapechar(int first, int type, stream_t *stream, int *valp)
-{
-	int next, value;
-
-	next = nextchar(stream);
-	value = first;
-
-	if (first == '\n')
-		warning(stream_pos(stream), "Newline in string or character constant");
-
-	if (first == '\\' && next != EOF) {
-		value = next;
-		next = nextchar(stream);
-		if (value != type) {
-			switch (value) {
-			case 'a':
-				value = '\a';
-				break;
-			case 'b':
-				value = '\b';
-				break;
-			case 't':
-				value = '\t';
-				break;
-			case 'n':
-				value = '\n';
-				break;
-			case 'v':
-				value = '\v';
-				break;
-			case 'f':
-				value = '\f';
-				break;
-			case 'r':
-				value = '\r';
-				break;
-			case 'e':
-				value = '\e';
-				break;
-			case '\\':
-				break;
-			case '?':
-				break;
-			case '\'':
-				break;
-			case '"':
-				break;
-			case '\n':
-				warning(stream_pos(stream), "Newline in string or character constant");
-				break;
-			case '0'...'7': {
-				int nr = 2;
-				value -= '0';
-				while (next >= '0' && next <= '7') {
-					value = (value << 3) + (next-'0');
-					next = nextchar(stream);
-					if (!--nr)
-						break;
-				}
-				value &= 0xff;
-				break;
-			}
-			case 'x': {
-				int hex = hexval(next);
-				if (hex < 16) {
-					value = hex;
-					next = nextchar(stream);
-					while ((hex = hexval(next)) < 16) {
-						value = (value << 4) + hex;
-						next = nextchar(stream);
-					}
-					value &= 0xff;
-					break;
-				}
-			}
-			/* Fall through */
-			default:
-				warning(stream_pos(stream), "Unknown escape '%c'", value);
-			}
-		}
-		/* Mark it as escaped */
-		value |= 0x100;
-	}
-	*valp = value;
-	return next;
-}
-
-static int get_char_token(int next, stream_t *stream, enum token_type type)
-{
-	int value;
-	struct token *token;
-
-	next = escapechar(next, '\'', stream, &value);
-	if (value == '\'' || next != '\'') {
-		sparse_error(stream_pos(stream), "Bad character constant");
-		drop_token(stream);
-		return next;
-	}
-
-	token = stream->token;
-	token_type(token) = type;
-	token->character = value & 0xff;
-
-	add_token(stream);
-	return nextchar(stream);
-}
-
-static int get_string_token(int next, stream_t *stream, enum token_type type)
+static int eat_string(int next, stream_t *stream, enum token_type type)
 {
 	static char buffer[MAX_STRING];
 	struct string *string;
-	struct token *token;
+	struct token *token = stream->token;
 	int len = 0;
+	int escape;
+	int want_hex = 0;
+	char delim = type < TOKEN_STRING ? '\'' : '"';
 
-	for (;;) {
-		int val;
-		next = escapechar(next, '"', stream, &val);
-		if (val == '"')
-			break;
+	for (escape = 0; escape || next != delim; next = nextchar(stream)) {
+		if (len < MAX_STRING)
+			buffer[len] = next;
+		len++;
+		if (next == '\n') {
+			warning(stream_pos(stream),
+				"Newline in string or character constant");
+			if (delim == '\'') /* assume it's lost ' */
+				break;
+		}
 		if (next == EOF) {
-			warning(stream_pos(stream), "End of file in middle of string");
+			warning(stream_pos(stream),
+				"End of file in middle of string");
 			return next;
 		}
-		if (len < MAX_STRING)
-			buffer[len] = val;
-		len++;
+		if (!escape) {
+			if (want_hex && !(cclass[next + 1] & Hex))
+				warning(stream_pos(stream),
+					"\\x used with no following hex digits");
+			want_hex = 0;
+			escape = next == '\\';
+		} else {
+			if (!(cclass[next + 1] & Escape))
+				warning(stream_pos(stream),
+					"Unknown escape '%c'", next);
+			escape = 0;
+			want_hex = next == 'x';
+		}
 	}
-
+	if (want_hex)
+		warning(stream_pos(stream),
+			"\\x used with no following hex digits");
 	if (len > MAX_STRING) {
 		warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 		len = MAX_STRING;
 	}
-
-	string = __alloc_string(len+1);
-	memcpy(string->data, buffer, len);
-	string->data[len] = '\0';
-	string->length = len+1;
+	if (delim == '\'' && len <= 4) {
+		if (len == 0) {
+			sparse_error(stream_pos(stream),
+				"empty character constant");
+			return nextchar(stream);
+		}
+		token_type(token) = type + len;
+		memset(buffer + len, '\0', 4 - len);
+		memcpy(token->embedded, buffer, 4);
+	} else {
+		token_type(token) = type;
+		string = __alloc_string(len+1);
+		memcpy(string->data, buffer, len);
+		string->data[len] = '\0';
+		string->length = len+1;
+		token->string = string;
+	}
 
 	/* Pass it on.. */
 	token = stream->token;
-	token_type(token) = type;
-	token->string = string;
 	add_token(stream);
-	
-	return next;
+	return nextchar(stream);
 }
 
 static int drop_stream_eoln(stream_t *stream)
@@ -725,9 +750,9 @@ static int get_one_special(int c, stream_t *stream)
 			return get_one_number(c, next, stream);
 		break;
 	case '"':
-		return get_string_token(next, stream, TOKEN_STRING);
+		return eat_string(next, stream, TOKEN_STRING);
 	case '\'':
-		return get_char_token(next, stream, TOKEN_CHAR);
+		return eat_string(next, stream, TOKEN_CHAR);
 	case '/':
 		if (next == '/')
 			return drop_stream_eoln(stream);
@@ -901,17 +926,19 @@ static int get_one_identifier(int c, stream_t *stream)
 		buf[len] = next;
 		len++;
 	};
+	if (cclass[next + 1] & Quote) {
+		if (len == 1 && buf[0] == 'L') {
+			if (next == '\'')
+				return eat_string(nextchar(stream), stream,
+							TOKEN_WIDE_CHAR);
+			else
+				return eat_string(nextchar(stream), stream,
+							TOKEN_WIDE_STRING);
+		}
+	}
 	hash = ident_hash_end(hash);
-
 	ident = create_hashed_ident(buf, len, hash);
 
-	if (ident == &L_ident) {
-		if (next == '\'')
-			return get_char_token(nextchar(stream), stream, TOKEN_WIDE_CHAR);
-		if (next == '\"')
-			return get_string_token(nextchar(stream), stream, TOKEN_WIDE_STRING);
-	}
-
 	/* Pass it on.. */
 	token = stream->token;
 	token_type(token) = TOKEN_IDENT;
diff --git a/validation/__func__.c b/validation/__func__.c
new file mode 100644
index 00000000..65ce9282
--- /dev/null
+++ b/validation/__func__.c
@@ -0,0 +1,15 @@
+static void f(void)
+{
+	char *s1 = __func__;
+	char arr[2 * (sizeof __func__ == 2) - 1];
+	char *s2 = __func__ __func__;
+}
+/*
+ * check-name: __func__
+ * check-command: sparse -Wall $file
+ *
+ * check-error-start
+__func__.c:5:29: error: Expected ; at end of declaration
+__func__.c:5:29: error: got __func__
+ * check-error-end
+ */
diff --git a/validation/escapes.c b/validation/escapes.c
index 13f8f9c8..4a1b030e 100644
--- a/validation/escapes.c
+++ b/validation/escapes.c
@@ -8,14 +8,13 @@ static int bad_e[] = { '\c', '\0123', '\789', '\xdefg' };
  * check-name: Character escape sequences
  *
  * check-error-start
-escapes.c:6:27: warning: Unknown escape 'c'
-escapes.c:6:35: error: Bad character constant
-escapes.c:6:38: error: Bad character constant
-escapes.c:6:42: error: Bad character constant
-escapes.c:6:46: error: Bad character constant
-escapes.c:6:53: error: Bad character constant
-escapes.c:6:56: error: Bad character constant
-escapes.c:6:42: error: Expected } at end of initializer
-escapes.c:6:42: error: got 89
+escapes.c:6:26: warning: Unknown escape 'c'
+escapes.c:3:34: warning: hex escape sequence out of range
+escapes.c:3:44: warning: hex escape sequence out of range
+escapes.c:4:18: warning: hex escape sequence out of range
+escapes.c:6:30: warning: multi-character character constant
+escapes.c:6:39: warning: multi-character character constant
+escapes.c:6:47: warning: hex escape sequence out of range
+escapes.c:6:47: warning: multi-character character constant
  * check-error-end
  */
diff --git a/validation/foul-bitwise.c b/validation/foul-bitwise.c
index 9e21eab7..4b542cf9 100644
--- a/validation/foul-bitwise.c
+++ b/validation/foul-bitwise.c
@@ -24,7 +24,9 @@ static __le16 bar(__le16 a)
  * check-error-start
 foul-bitwise.c:9:16: warning: restricted __le16 degrades to integer
 foul-bitwise.c:9:22: warning: restricted __le16 degrades to integer
-foul-bitwise.c:19:16: error: incompatible types for operation (-)
-foul-bitwise.c:19:16:    argument has type restricted __le16 [usertype] a
+foul-bitwise.c:19:16: warning: restricted __le16 degrades to integer
+foul-bitwise.c:19:16: warning: incorrect type in return expression (different base types)
+foul-bitwise.c:19:16:    expected restricted __le16
+foul-bitwise.c:19:16:    got int
  * check-error-end
  */
diff --git a/validation/preprocessor/preprocessor14.c b/validation/preprocessor/preprocessor14.c
index 05fc248b..027af040 100644
--- a/validation/preprocessor/preprocessor14.c
+++ b/validation/preprocessor/preprocessor14.c
@@ -7,7 +7,6 @@ A(,1)
 B(,1)
 /*
  * check-name: Preprocessor #14
- * check-known-to-fail
  * check-command: sparse -E $file
  *
  * check-output-start
diff --git a/validation/preprocessor/preprocessor23.c b/validation/preprocessor/preprocessor23.c
new file mode 100644
index 00000000..25be5085
--- /dev/null
+++ b/validation/preprocessor/preprocessor23.c
@@ -0,0 +1,47 @@
+#define H(x,...) ,##x##__VA_ARGS__##,##__VA_ARGS__
+H()
+H(x)
+H(,)
+H(x,)
+H(,x)
+H(x,x)
+#define I(x,...) ,##x##__VA_ARGS__
+I()
+I(x)
+I(,)
+I(x,)
+I(,x)
+I(x,x)
+/*
+ * check-name: Preprocessor #23
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+,
+,x
+,,
+,x,
+,x,x
+,xx,x
+,x
+,
+,x
+,x
+,xx
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/preprocessor23.c:3:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:4:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:5:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:5:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:6:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:6:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:7:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:7:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:10:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:12:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:14:1: error: '##' failed: concatenation is not a valid token
+ * check-error-end
+ */
diff --git a/validation/preprocessor/stringify.c b/validation/preprocessor/stringify.c
new file mode 100644
index 00000000..7fe965d5
--- /dev/null
+++ b/validation/preprocessor/stringify.c
@@ -0,0 +1,29 @@
+#define A(x) #x
+A('a')
+A("a")
+A(a)
+A(\n)
+A('\n')
+A("\n")
+A('"')
+A("a\nb")
+A(L"a\nb")
+A('\12')
+/*
+ * check-name: Preprocessor #14
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+"'a'"
+"\"a\""
+"a"
+"\n"
+"'\\n'"
+"\"\\n\""
+"'\"'"
+"\"a\\nb\""
+"L\"a\\nb\""
+"'\\12'"
+ * check-output-end
+ */
diff --git a/validation/preprocessor/wide.c b/validation/preprocessor/wide.c
new file mode 100644
index 00000000..21b643ce
--- /dev/null
+++ b/validation/preprocessor/wide.c
@@ -0,0 +1,15 @@
+#define A(x) L##x
+A('a')
+A("bc")
+/*
+ * check-name: wide char token-pasting
+ * check-description: Used to cause infinite recursion.
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+L'a'
+L"bc"
+ * check-output-end
+ */
+
diff --git a/validation/wide.c b/validation/wide.c
new file mode 100644
index 00000000..847a680f
--- /dev/null
+++ b/validation/wide.c
@@ -0,0 +1,9 @@
+static char c = L'\x41';
+static int n = 1/(0x41 - L'\x41');
+/*
+ * check-name: wide character constants
+ *
+ * check-error-start
+wide.c:2:17: warning: division by zero
+ * check-error-end
+ */