Merge git://git.kernel.org/pub/scm/linux/kernel/git/viro/sparse into marge

Pull preprocessor fix from Al Viro. 1) we really, really shouldn't convert escape sequences too early; #define A(x) #x A('\12') should yield "'\\12'", *not* "'\\n'". 2) literal merging handles all sequences of string/wide string literals; result is wide if any of them is wide. string_expression() is handling that wrong - "ab"L"c" is L"abc" 3) with support (no matter how cursory) of wide char constants and wide string literals, we really ought to handle #define A(x,y) A(L,'a') properly; it's not that tricky - combine() needs to recognize <IDENT["L"],CHAR> and <IDENT["L"],STRING> pairs. 4) '\777' is an error, while L'\777' is valid - the value should fit into unsigned char or unsigned counterpart of wchar_t. Note that for string literals this happens *after* phase 6 - what matters is the type of literal after joining the adjacent ones (see (2) above). 5) stringifying should only quote \ and " in character constants and string literals, #define A(x) #x A(\n) should produce "\n", not "\\n" 6) we are losing L when stringifying wide string literals; that's wrong. I've patches hopefully fixing the above. Basically, I delay interpreting escape sequences (past the bare minimum needed to find where the token ends) until we are handling an expression with character constant or string literal in it. For character constants I'm keeping the token body in token->embedded - 4-character array replacing token->character. That covers practically all realistic instances; character constant *may* be longer than that, but it has to be something like '\x000000000000000000000000041' - sure, that's 100% legitimate C and it's going to be the same as '\x41' on everything, but when was the last time you've seen something like that? So I've split TOKEN_CHAR into 5 values - TOKEN_CHAR+1--TOKEN_CHAR+4 meaning 1--4 characters kept in ->embedded[], TOKEN_CHAR itself used for extremely rare cases longer than that (token->string holds the body in that case). TOKEN_WIDE_CHAR got the same treatment. AFAICS, with those fixes we get the same behaviour as in gcc for silently ignored by cpp if the string/char constant doesn't make it out of preprocessor. sparse still warns about those. The situation with this one is frustrating; on one hand C99 is saying that e.g. '\x' is not a token. Moreover, in a footnote in 6.4.4.4 it flat-out requires diagnostics for such. On the other hand... footnotes are informative-only and having "other character" token match ' would puts us in nasal daemon country, so gcc is free to do whatever it feels like doing. I think we shouldn't play that kind of standard-lawyering *and* sparse has always warned on that, so I've left that warning in place. Note that real wchar_t handling is still not there; at the very least, we need to decide what type will be used for that sucker (for gcc it's int on all targets we care about), fix the handling of wide string literals in initializers and evaluate_string() and stop dropping upper bits in get_string_constant(). That would probably mean not using struct string for wide ones, as well... Hell knows; I don't want to touch that right now. If anything, I'd rather wait until we get to C11 support - they've got much saner variants of wide strings there (char16_t/char32_t with u and U as token prefix as L is used for wchar_t; there's also u8"..." for UTF8 strings).
author: Christopher Li <sparse@chrisli.org> 2013-02-12 23:01:45 -0800
committer: Christopher Li <sparse@chrisli.org> 2013-02-13 14:55:26 -0800
commit: 1b8e012d10d2a5af2d4935e4a47df9c527399219 (patch)
tree: eb5c93ce49e6bc718f6da55fdb59c85df7a7f011
parent: 6558e30ec635e26e767cee027936a0d0cae79bcb (diff)
parent: 3dbed8ac24a2b4b24bc9776d89ea5328f1424a63 (diff)
download: sparse-dev-1b8e012d10d2a5af2d4935e4a47df9c527399219.tar.gz
18 files changed, 785 insertions, 422 deletions
diff --git a/Makefile b/Makefile
index 84e5df24..b195528e 100644
--- a/Makefile
+++ b/Makefile
@@ -93,7 +93,7 @@ LIB_H=    token.h parse.h lib.h symbol.h scope.h expression.h target.h \
 
 LIB_OBJS= target.o parse.o tokenize.o pre-process.o symbol.o lib.o scope.o \
 	  expression.o show-parse.o evaluate.o expand.o inline.o linearize.o \
-	  sort.o allocate.o compat-$(OS).o ptrlist.o \
+	  char.o sort.o allocate.o compat-$(OS).o ptrlist.o \
 	  flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o
 
 LIB_FILE= libsparse.a
diff --git a/char.c b/char.c
new file mode 100644
index 00000000..92674565
--- /dev/null
+++ b/char.c
@@ -0,0 +1,131 @@
+#include <string.h>
+#include "target.h"
+#include "lib.h"
+#include "allocate.h"
+#include "token.h"
+#include "expression.h"
+
+static const char *parse_escape(const char *p, unsigned *val, const char *end, int bits, struct position pos)
+{
+	unsigned c = *p++;
+	unsigned d;
+	if (c != '\\') {
+		*val = c;
+		return p;
+	}
+
+	c = *p++;
+	switch (c) {
+	case 'a': c = '\a'; break;
+	case 'b': c = '\b'; break;
+	case 't': c = '\t'; break;
+	case 'n': c = '\n'; break;
+	case 'v': c = '\v'; break;
+	case 'f': c = '\f'; break;
+	case 'r': c = '\r'; break;
+	case 'e': c = '\e'; break;
+	case 'x': {
+		unsigned mask = -(1U << (bits - 4));
+		for (c = 0; p < end; c = (c << 4) + d) {
+			d = hexval(*p++);
+			if (d > 16)
+				break;
+			if (c & mask) {
+				warning(pos,
+					"hex escape sequence out of range");
+				mask = 0;
+			}
+		}
+		break;
+	}
+	case '0'...'7': {
+		if (p + 2 < end)
+			end = p + 2;
+		c -= '0';
+		while (p < end && (d = *p++ - '0') < 8)
+			c = (c << 3) + d;
+		if ((c & 0400) && bits < 9)
+			warning(pos,
+				"octal escape sequence out of range");
+		break;
+	}
+	default:	/* everything else is left as is */
+		break;
+	}
+	*val = c & ~((~0U << (bits - 1)) << 1);
+	return p;
+}
+
+void get_char_constant(struct token *token, unsigned long long *val)
+{
+	const char *p = token->embedded, *end;
+	unsigned v;
+	int type = token_type(token);
+	switch (type) {
+	case TOKEN_CHAR:
+	case TOKEN_WIDE_CHAR:
+		p = token->string->data;
+		end = p + token->string->length;
+		break;
+	case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+		end = p + type - TOKEN_CHAR;
+		break;
+	default:
+		end = p + type - TOKEN_WIDE_CHAR;
+	}
+	p = parse_escape(p, &v, end,
+			type < TOKEN_WIDE_CHAR ? bits_in_char : 32, token->pos);
+	if (p != end)
+		warning(token->pos,
+			"multi-character character constant");
+	*val = v;
+}
+
+struct token *get_string_constant(struct token *token, struct expression *expr)
+{
+	struct string *string = token->string;
+	struct token *next = token->next, *done = NULL;
+	int stringtype = token_type(token);
+	int is_wide = stringtype == TOKEN_WIDE_STRING;
+	static char buffer[MAX_STRING];
+	int len = 0;
+	int bits;
+
+	while (!done) {
+		switch (token_type(next)) {
+		case TOKEN_WIDE_STRING:
+			is_wide = 1;
+		case TOKEN_STRING:
+			next = next->next;
+			break;
+		default:
+			done = next;
+		}
+	}
+	bits = is_wide ? 32 : bits_in_char;
+	while (token != done) {
+		unsigned v;
+		const char *p = token->string->data;
+		const char *end = p + token->string->length - 1;
+		while (p < end) {
+			p = parse_escape(p, &v, end, bits, token->pos);
+			if (len < MAX_STRING)
+				buffer[len] = v;
+			len++;
+		}
+		token = token->next;
+	}
+	if (len > MAX_STRING) {
+		warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", len, MAX_STRING);
+		len = MAX_STRING;
+	}
+
+	if (len >= string->length)	/* can't cannibalize */
+		string = __alloc_string(len+1);
+	string->length = len+1;
+	memcpy(string->data, buffer, len);
+	string->data[len] = '\0';
+	expr->string = string;
+	expr->wide = is_wide;
+	return token;
+}
diff --git a/char.h b/char.h
new file mode 100644
index 00000000..54be6b74
--- /dev/null
+++ b/char.h
@@ -0,0 +1,2 @@
+extern void get_char_constant(struct token *, unsigned long long *);
+extern struct token *get_string_constant(struct token *, struct expression *);
diff --git a/evaluate.c b/evaluate.c
index 0987a5e5..d09f271a 100644
--- a/evaluate.c
+++ b/evaluate.c
@@ -1696,16 +1696,20 @@ static struct symbol *evaluate_postop(struct expression *expr)
 {
 	struct expression *op = expr->unop;
 	struct symbol *ctype = op->ctype;
-	int class = classify_type(op->ctype, &ctype);
+	int class = classify_type(ctype, &ctype);
 	int multiply = 0;
 
+	if (!class || class & TYPE_COMPOUND) {
+		expression_error(expr, "need scalar for ++/--");
+		return NULL;
+	}
 	if (!lvalue_expression(expr->unop)) {
 		expression_error(expr, "need lvalue expression for ++/--");
 		return NULL;
 	}
 
 	if ((class & TYPE_RESTRICT) && restricted_unop(expr->op, &ctype))
-		return bad_expr_type(expr);
+		unrestrict(expr, class, &ctype);
 
 	if (class & TYPE_NUM) {
 		multiply = 1;
@@ -1735,13 +1739,13 @@ static struct symbol *evaluate_sign(struct expression *expr)
 	/* should be an arithmetic type */
 	if (!(class & TYPE_NUM))
 		return bad_expr_type(expr);
-	if (!(class & (TYPE_FLOAT|TYPE_RESTRICT))) {
-		struct symbol *rtype = integer_promotion(ctype);
-		expr->unop = cast_to(expr->unop, rtype);
-		ctype = rtype;
-	} else if ((class & TYPE_FLOAT) && expr->op != '~') {
-		/* no conversions needed */
-	} else if ((class & TYPE_RESTRICT) && !restricted_unop(expr->op, &ctype)) {
+	if (class & TYPE_RESTRICT)
+		goto Restr;
+Normal:
+	if (!(class & TYPE_FLOAT)) {
+		ctype = integer_promotion(ctype);
+		expr->unop = cast_to(expr->unop, ctype);
+	} else if (expr->op != '~') {
 		/* no conversions needed */
 	} else {
 		return bad_expr_type(expr);
@@ -1750,6 +1754,10 @@ static struct symbol *evaluate_sign(struct expression *expr)
 		*expr = *expr->unop;
 	expr->ctype = ctype;
 	return ctype;
+Restr:
+	if (restricted_unop(expr->op, &ctype))
+		unrestrict(expr, class, &ctype);
+	goto Normal;
 }
 
 static struct symbol *evaluate_preop(struct expression *expr)
diff --git a/expression.c b/expression.c
index 9f45c794..d2437c74 100644
--- a/expression.c
+++ b/expression.c
@@ -26,6 +26,7 @@
 #include "scope.h"
 #include "expression.h"
 #include "target.h"
+#include "char.h"
 
 static int match_oplist(int op, ...)
 {
@@ -64,53 +65,50 @@ struct token *parens_expression(struct token *token, struct expression **expr, c
  * Handle __func__, __FUNCTION__ and __PRETTY_FUNCTION__ token
  * conversion
  */
-static int convert_one_fn_token(struct token *token)
+static struct symbol *handle_func(struct token *token)
 {
-	struct symbol *sym = current_fn;
-
-	if (sym) {
-		struct ident *ident = sym->ident;
-		if (ident) {
-			int len = ident->len;
-			struct string *string;
-
-			string = __alloc_string(len+1);
-			memcpy(string->data, ident->name, len);
-			string->data[len] = 0;
-			string->length = len+1;
-			token_type(token) = TOKEN_STRING;
-			token->string = string;
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static int convert_function(struct token *next)
-{
-	int retval = 0;
-	for (;;) {
-		struct token *token = next;
-		next = next->next;
-		switch (token_type(token)) {
-		case TOKEN_STRING:
-			continue;
-		case TOKEN_IDENT:
-			if (token->ident == &__func___ident ||
-			    token->ident == &__FUNCTION___ident ||
-			    token->ident == &__PRETTY_FUNCTION___ident) {
-				if (!convert_one_fn_token(token))
-					break;
-				retval = 1;
-				continue;
-			}
-		/* Fall through */
-		default:
-			break;
-		}
-		break;
-	}
-	return retval;
+	struct ident *ident = token->ident;
+	struct symbol *decl, *array;
+	struct string *string;
+	int len;
+
+	if (ident != &__func___ident &&
+	    ident != &__FUNCTION___ident &&
+	    ident != &__PRETTY_FUNCTION___ident)
+		return NULL;
+
+	if (!current_fn)
+		return NULL;
+
+	/* OK, it's one of ours */
+	array = alloc_symbol(token->pos, SYM_ARRAY);
+	array->ctype.base_type = &char_ctype;
+	array->ctype.alignment = 1;
+	array->endpos = token->pos;
+	decl = alloc_symbol(token->pos, SYM_NODE);
+	decl->ctype.base_type = array;
+	decl->ctype.alignment = 1;
+	decl->ctype.modifiers = MOD_STATIC;
+	decl->endpos = token->pos;
+
+	/* function-scope, but in NS_SYMBOL */
+	bind_symbol(decl, ident, NS_LABEL);
+	decl->namespace = NS_SYMBOL;
+
+	len = current_fn->ident->len;
+	string = __alloc_string(len + 1);
+	memcpy(string->data, current_fn->ident->name, len);
+	string->data[len] = 0;
+	string->length = len + 1;
+
+	decl->initializer = alloc_expression(token->pos, EXPR_STRING);
+	decl->initializer->string = string;
+	decl->initializer->ctype = decl;
+	decl->array_size = alloc_const_expression(token->pos, len + 1);
+	array->array_size = decl->array_size;
+	decl->bit_size = array->bit_size = bytes_to_bits(len + 1);
+
+	return decl;
 }
 
 static struct token *parse_type(struct token *token, struct expression **tree)
@@ -220,50 +218,6 @@ static struct token *builtin_offsetof_expr(struct token *token,
 	}
 }
 
-static struct token *string_expression(struct token *token, struct expression *expr)
-{
-	struct string *string = token->string;
-	struct token *next = token->next;
-	int stringtype = token_type(token);
-
-	convert_function(token);
-
-	if (token_type(next) == stringtype) {
-		int totlen = string->length-1;
-		char *data;
-
-		do {
-			totlen += next->string->length-1;
-			next = next->next;
-		} while (token_type(next) == stringtype);
-
-		if (totlen > MAX_STRING) {
-			warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
-			totlen = MAX_STRING;
-		}
-
-		string = __alloc_string(totlen+1);
-		string->length = totlen+1;
-		data = string->data;
-		next = token;
-		do {
-			struct string *s = next->string;
-			int len = s->length-1;
-
-			if (len > totlen)
-				len = totlen;
-			totlen -= len;
-
-			next = next->next;
-			memcpy(data, s->data, len);
-			data += len;
-		} while (token_type(next) == stringtype);
-		*data = '\0';
-	}
-	expr->string = string;
-	return next;
-}
-
 #ifndef ULLONG_MAX
 #define ULLONG_MAX (~0ULL)
 #endif
@@ -404,12 +358,11 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 	struct expression *expr = NULL;
 
 	switch (token_type(token)) {
-	case TOKEN_CHAR:
-	case TOKEN_WIDE_CHAR:
+	case TOKEN_CHAR ... TOKEN_WIDE_CHAR + 4:
 		expr = alloc_expression(token->pos, EXPR_VALUE);   
 		expr->flags = Int_const_expr;
-		expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype;
-		expr->value = (unsigned char) token->character;
+		expr->ctype = token_type(token) < TOKEN_WIDE_CHAR ? &int_ctype : &long_ctype;
+		get_char_constant(token, &expr->value);
 		token = token->next;
 		break;
 
@@ -434,8 +387,7 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 		struct token *next = token->next;
 
 		if (!sym) {
-			if (convert_function(token))
-				goto handle_string;
+			sym = handle_func(token);
 			if (token->ident == &__builtin_types_compatible_p_ident) {
 				token = builtin_types_compatible_p_expr(token, &expr);
 				break;
@@ -473,13 +425,10 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 	}
 
 	case TOKEN_STRING:
-	case TOKEN_WIDE_STRING: {
-	handle_string:
+	case TOKEN_WIDE_STRING:
 		expr = alloc_expression(token->pos, EXPR_STRING);
-		expr->wide = token_type(token) == TOKEN_WIDE_STRING;
-		token = string_expression(token, expr);
+		token = get_string_constant(token, expr);
 		break;
-	}
 
 	case TOKEN_SPECIAL:
 		if (token->special == '(') {
diff --git a/lib.c b/lib.c
index bb814f2e..6bd10d36 100644
--- a/lib.c
+++ b/lib.c
@@ -234,8 +234,8 @@ int arch_m64 = ARCH_M64_DEFAULT;
 int arch_msize_long = 0;
 
 #define CMDLINE_INCLUDE 20
-int cmdline_include_nr = 0;
-struct cmdline_include cmdline_include[CMDLINE_INCLUDE];
+static int cmdline_include_nr = 0;
+static char *cmdline_include[CMDLINE_INCLUDE];
 
 
 void add_pre_buffer(const char *fmt, ...)
@@ -308,16 +308,9 @@ static char **handle_switch_I(char *arg, char **next)
 
 static void add_cmdline_include(char *filename)
 {
-	int fd = open(filename, O_RDONLY);
-	if (fd < 0) {
-		perror(filename);
-		return;
-	}
 	if (cmdline_include_nr >= CMDLINE_INCLUDE)
 		die("too many include files for %s\n", filename);
-	cmdline_include[cmdline_include_nr].filename = filename;
-	cmdline_include[cmdline_include_nr].fd = fd;
-	cmdline_include_nr++;
+	cmdline_include[cmdline_include_nr++] = filename;
 }
 
 static char **handle_switch_i(char *arg, char **next)
@@ -930,19 +923,13 @@ static struct symbol_list *sparse_file(const char *filename)
  */
 static struct symbol_list *sparse_initial(void)
 {
-	struct token *token;
 	int i;
 
 	// Prepend any "include" file to the stream.
 	// We're in global scope, it will affect all files!
-	token = NULL;
-	for (i = cmdline_include_nr - 1; i >= 0; i--)
-		token = tokenize(cmdline_include[i].filename, cmdline_include[i].fd,
-				 token, includepath);
-
-	// Prepend the initial built-in stream
-	if (token)
-		pre_buffer_end->next = token;
+	for (i = 0; i < cmdline_include_nr; i++)
+		add_pre_buffer("#argv_include \"%s\"\n", cmdline_include[i]);
+
 	return sparse_tokenstream(pre_buffer_begin);
 }
 
diff --git a/lib.h b/lib.h
index 2cea2520..ee954fed 100644
--- a/lib.h
+++ b/lib.h
@@ -41,15 +41,6 @@ struct position {
 		     noexpand:1;
 };
 
-struct cmdline_include {
-	char *filename;
-	int fd;
-};
-
-extern struct cmdline_include cmdline_include[];
-extern int cmdline_include_nr;
-
-
 struct ident;
 struct token;
 struct symbol;
diff --git a/pre-process.c b/pre-process.c
index 8a16f8b3..e5f56b40 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -82,8 +82,6 @@ static struct token *alloc_token(struct position *pos)
 	return token;
 }
 
-static const char *show_token_sequence(struct token *token);
-
 /* Expand symbol 'sym' at '*list' */
 static int expand(struct token **, struct symbol *);
 
@@ -340,9 +338,35 @@ static struct token *dup_list(struct token *list)
 	return res;
 }
 
+static const char *quote_token_sequence(struct token *token)
+{
+	static char buffer[1024];
+	char *ptr = buffer;
+	int whitespace = 0;
+
+	while (!eof_token(token)) {
+		const char *val = quote_token(token);
+		int len = strlen(val);
+
+		if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
+			sparse_error(token->pos, "too long token expansion");
+			break;
+		}
+
+		if (whitespace)
+			*ptr++ = ' ';
+		memcpy(ptr, val, len);
+		ptr += len;
+		token = token->next;
+		whitespace = token->pos.whitespace;
+	}
+	*ptr = 0;
+	return buffer;
+}
+
 static struct token *stringify(struct token *arg)
 {
-	const char *s = show_token_sequence(arg);
+	const char *s = quote_token_sequence(arg);
 	int size = strlen(s)+1;
 	struct token *token = __alloc_token(0);
 	struct string *string = __alloc_string(size);
@@ -383,6 +407,8 @@ static void expand_arguments(int count, struct arg *args)
  * Possibly valid combinations:
  *  - ident + ident -> ident
  *  - ident + number -> ident unless number contains '.', '+' or '-'.
+ *  - 'L' + char constant -> wide char constant
+ *  - 'L' + string literal -> wide string literal
  *  - number + number -> number
  *  - number + ident -> number
  *  - number + '.' -> number
@@ -398,6 +424,13 @@ static enum token_type combine(struct token *left, struct token *right, char *p)
 	if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
 		return TOKEN_ERROR;
 
+	if (t1 == TOKEN_IDENT && left->ident == &L_ident) {
+		if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR)
+			return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR;
+		if (t2 == TOKEN_STRING)
+			return TOKEN_WIDE_STRING;
+	}
+
 	if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
 		return TOKEN_ERROR;
 
@@ -440,9 +473,10 @@ static enum token_type combine(struct token *left, struct token *right, char *p)
 static int merge(struct token *left, struct token *right)
 {
 	static char buffer[512];
+	enum token_type res = combine(left, right, buffer);
 	int n;
 
-	switch (combine(left, right, buffer)) {
+	switch (res) {
 	case TOKEN_IDENT:
 		left->ident = built_in_ident(buffer);
 		left->pos.noexpand = 0;
@@ -465,6 +499,21 @@ static int merge(struct token *left, struct token *right)
 				return 1;
 			}
 		}
+		break;
+
+	case TOKEN_WIDE_CHAR:
+	case TOKEN_WIDE_STRING:
+		token_type(left) = res;
+		left->pos.noexpand = 0;
+		left->string = right->string;
+		return 1;
+
+	case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+		token_type(left) = res;
+		left->pos.noexpand = 0;
+		memcpy(left->embedded, right->embedded, 4);
+		return 1;
+
 	default:
 		;
 	}
@@ -472,12 +521,12 @@ static int merge(struct token *left, struct token *right)
 	return 0;
 }
 
-static struct token *dup_token(struct token *token, struct position *streampos, struct position *pos)
+static struct token *dup_token(struct token *token, struct position *streampos)
 {
 	struct token *alloc = alloc_token(streampos);
 	token_type(alloc) = token_type(token);
-	alloc->pos.newline = pos->newline;
-	alloc->pos.whitespace = pos->whitespace;
+	alloc->pos.newline = token->pos.newline;
+	alloc->pos.whitespace = token->pos.whitespace;
 	alloc->number = token->number;
 	alloc->pos.noexpand = token->pos.noexpand;
 	return alloc;	
@@ -489,7 +538,7 @@ static struct token **copy(struct token **where, struct token *list, int *count)
 	while (!eof_token(list)) {
 		struct token *token;
 		if (need_copy)
-			token = dup_token(list, &list->pos, &list->pos);
+			token = dup_token(list, &list->pos);
 		else
 			token = list;
 		if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
@@ -502,17 +551,37 @@ static struct token **copy(struct token **where, struct token *list, int *count)
 	return where;
 }
 
+static int handle_kludge(struct token **p, struct arg *args)
+{
+	struct token *t = (*p)->next->next;
+	while (1) {
+		struct arg *v = &args[t->argnum];
+		if (token_type(t->next) != TOKEN_CONCAT) {
+			if (v->arg) {
+				/* ignore the first ## */
+				*p = (*p)->next;
+				return 0;
+			}
+			/* skip the entire thing */
+			*p = t;
+			return 1;
+		}
+		if (v->arg && !eof_token(v->arg))
+			return 0; /* no magic */
+		t = t->next->next;
+	}
+}
+
 static struct token **substitute(struct token **list, struct token *body, struct arg *args)
 {
-	struct token *token = *list;
-	struct position *base_pos = &token->pos;
-	struct position *pos = base_pos;
+	struct position *base_pos = &(*list)->pos;
 	int *count;
 	enum {Normal, Placeholder, Concat} state = Normal;
 
-	for (; !eof_token(body); body = body->next, pos = &body->pos) {
+	for (; !eof_token(body); body = body->next) {
 		struct token *added, *arg;
 		struct token **tail;
+		struct token *t;
 
 		switch (token_type(body)) {
 		case TOKEN_GNU_KLUDGE:
@@ -520,13 +589,20 @@ static struct token **substitute(struct token **list, struct token *body, struct
 			 * GNU kludge: if we had <comma>##<vararg>, behaviour
 			 * depends on whether we had enough arguments to have
 			 * a vararg.  If we did, ## is just ignored.  Otherwise
-			 * both , and ## are ignored.  Comma should come from
-			 * the body of macro and not be an argument of earlier
-			 * concatenation.
+			 * both , and ## are ignored.  Worse, there can be
+			 * an arbitrary number of ##<arg> in between; if all of
+			 * those are empty, we act as if they hadn't been there,
+			 * otherwise we act as if the kludge didn't exist.
 			 */
-			if (!args[body->next->argnum].arg)
+			t = body;
+			if (handle_kludge(&body, args)) {
+				if (state == Concat)
+					state = Normal;
+				else
+					state = Placeholder;
 				continue;
-			added = dup_token(body, base_pos, pos);
+			}
+			added = dup_token(t, base_pos);
 			token_type(added) = TOKEN_SPECIAL;
 			tail = &added->next;
 			break;
@@ -557,8 +633,8 @@ static struct token **substitute(struct token **list, struct token *body, struct
 			}
 		copy_arg:
 			tail = copy(&added, arg, count);
-			added->pos.newline = pos->newline;
-			added->pos.whitespace = pos->whitespace;
+			added->pos.newline = body->pos.newline;
+			added->pos.whitespace = body->pos.whitespace;
 			break;
 
 		case TOKEN_CONCAT:
@@ -569,14 +645,14 @@ static struct token **substitute(struct token **list, struct token *body, struct
 			continue;
 
 		case TOKEN_IDENT:
-			added = dup_token(body, base_pos, pos);
+			added = dup_token(body, base_pos);
 			if (added->ident->tainted)
 				added->pos.noexpand = 1;
 			tail = &added->next;
 			break;
 
 		default:
-			added = dup_token(body, base_pos, pos);
+			added = dup_token(body, base_pos);
 			tail = &added->next;
 			break;
 		}
@@ -625,6 +701,14 @@ static int expand(struct token **list, struct symbol *sym)
 
 	last = token->next;
 	tail = substitute(list, sym->expansion, args);
+	/*
+	 * Note that it won't be eof - at least TOKEN_UNTAINT will be there.
+	 * We still can lose the newline flag if the sucker expands to nothing,
+	 * but the price of dealing with that is probably too high (we'd need
+	 * to collect the flags during scan_next())
+	 */
+	(*list)->pos.newline = token->pos.newline;
+	(*list)->pos.whitespace = token->pos.whitespace;
 	*tail = last;
 
 	return 0;
@@ -767,31 +851,6 @@ static int do_include_path(const char **pptr, struct token **list, struct token
 	return 0;
 }
 
-static void do_include(int local, struct stream *stream, struct token **list, struct token *token, const char *filename, const char **path)
-{
-	int flen = strlen(filename) + 1;
-
-	/* Absolute path? */
-	if (filename[0] == '/') {
-		if (try_include("", filename, flen, list, includepath))
-			return;
-		goto out;
-	}
-
-	/* Dir of input file is first dir to search for quoted includes */
-	set_stream_include_path(stream);
-
-	if (!path)
-		/* Do not search quote include if <> is in use */
-		path = local ? quote_includepath : angle_includepath;
-
-	/* Check the standard include paths.. */
-	if (do_include_path(path, list, token, filename, flen))
-		return;
-out:
-	error_die(token->pos, "unable to open '%s'", filename);
-}
-
 static int free_preprocessor_line(struct token *token)
 {
 	while (token_type(token) != TOKEN_EOF) {
@@ -802,11 +861,13 @@ static int free_preprocessor_line(struct token *token)
 	return 1;
 }
 
-static int handle_include_path(struct stream *stream, struct token **list, struct token *token, const char **path)
+static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how)
 {
 	const char *filename;
 	struct token *next;
+	const char **path;
 	int expect;
+	int flen;
 
 	next = token->next;
 	expect = '>';
@@ -819,20 +880,52 @@ static int handle_include_path(struct stream *stream, struct token **list, struc
 			expect = '>';
 		}
 	}
+
 	token = next->next;
 	filename = token_name_sequence(token, expect, token);
-	do_include(!expect, stream, list, token, filename, path);
-	return 0;
+	flen = strlen(filename) + 1;
+
+	/* Absolute path? */
+	if (filename[0] == '/') {
+		if (try_include("", filename, flen, list, includepath))
+			return 0;
+		goto out;
+	}
+
+	switch (how) {
+	case 1:
+		path = stream->next_path;
+		break;
+	case 2:
+		includepath[0] = "";
+		path = includepath;
+		break;
+	default:
+		/* Dir of input file is first dir to search for quoted includes */
+		set_stream_include_path(stream);
+		path = expect ? angle_includepath : quote_includepath;
+		break;
+	}
+	/* Check the standard include paths.. */
+	if (do_include_path(path, list, token, filename, flen))
+		return 0;
+out:
+	error_die(token->pos, "unable to open '%s'", filename);
 }
 
 static int handle_include(struct stream *stream, struct token **list, struct token *token)
 {
-	return handle_include_path(stream, list, token, NULL);
+	return handle_include_path(stream, list, token, 0);
 }
 
 static int handle_include_next(struct stream *stream, struct token **list, struct token *token)
 {
-	return handle_include_path(stream, list, token, stream->next_path);
+	return handle_include_path(stream, list, token, 1);
+}
+
+static int handle_argv_include(struct stream *stream, struct token **list, struct token *token)
+{
+	return handle_include_path(stream, list, token, 2);
 }
 
 static int token_different(struct token *t1, struct token *t2)
@@ -863,10 +956,12 @@ static int token_different(struct token *t1, struct token *t2)
 	case TOKEN_STR_ARGUMENT:
 		different = t1->argnum != t2->argnum;
 		break;
+	case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+	case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+		different = memcmp(t1->embedded, t2->embedded, 4);
+		break;
 	case TOKEN_CHAR:
 	case TOKEN_WIDE_CHAR:
-		different = t1->character != t2->character;
-		break;
 	case TOKEN_STRING:
 	case TOKEN_WIDE_STRING: {
 		struct string *s1, *s2;
@@ -1035,6 +1130,10 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 			}
 			if (n)
 				return count->vararg ? 2 : 1;
+			/*
+			 * XXX - need saner handling of that
+			 * (>= 1024 instances of argument)
+			 */
 			token_type(token) = TOKEN_ERROR;
 			return -1;
 		}
@@ -1042,49 +1141,103 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 	return 0;
 }
 
+static struct token *handle_hash(struct token **p, struct token *arglist)
+{
+	struct token *token = *p;
+	if (arglist) {
+		struct token *next = token->next;
+		if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
+			goto Equote;
+		next->pos.whitespace = token->pos.whitespace;
+		__free_token(token);
+		token = *p = next;
+	} else {
+		token->pos.noexpand = 1;
+	}
+	return token;
+
+Equote:
+	sparse_error(token->pos, "'#' is not followed by a macro parameter");
+	return NULL;
+}
+
+/* token->next is ## */
+static struct token *handle_hashhash(struct token *token, struct token *arglist)
+{
+	struct token *last = token;
+	struct token *concat;
+	int state = match_op(token, ',');
+	
+	try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+
+	while (1) {
+		struct token *t;
+		int is_arg;
+
+		/* eat duplicate ## */
+		concat = token->next;
+		while (match_op(t = concat->next, SPECIAL_HASHHASH)) {
+			token->next = t;
+			__free_token(concat);
+			concat = t;
+		}
+		token_type(concat) = TOKEN_CONCAT;
+
+		if (eof_token(t))
+			goto Econcat;
+
+		if (match_op(t, '#')) {
+			t = handle_hash(&concat->next, arglist);
+			if (!t)
+				return NULL;
+		}
+
+		is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
+
+		if (state == 1 && is_arg) {
+			state = is_arg;
+		} else {
+			last = t;
+			state = match_op(t, ',');
+		}
+
+		token = t;
+		if (!match_op(token->next, SPECIAL_HASHHASH))
+			break;
+	}
+	/* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */
+	if (state == 2)
+		token_type(last) = TOKEN_GNU_KLUDGE;
+	return token;
+
+Econcat:
+	sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion");
+	return NULL;
+}
+
 static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
 {
 	struct token *token = expansion;
 	struct token **p;
-	struct token *last = NULL;
 
 	if (match_op(token, SPECIAL_HASHHASH))
 		goto Econcat;
 
 	for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
 		if (match_op(token, '#')) {
-			if (arglist) {
-				struct token *next = token->next;
-				if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
-					goto Equote;
-				next->pos.whitespace = token->pos.whitespace;
-				token = *p = next;
-			} else {
-				token->pos.noexpand = 1;
-			}
-		} else if (match_op(token, SPECIAL_HASHHASH)) {
-			struct token *next = token->next;
-			int arg = try_arg(next, TOKEN_QUOTED_ARGUMENT, arglist);
-			token_type(token) = TOKEN_CONCAT;
-			if (arg) {
-				token = next;
-				/* GNU kludge */
-				if (arg == 2 && last && match_op(last, ',')) {
-					token_type(last) = TOKEN_GNU_KLUDGE;
-					last->next = token;
-				}
-			} else if (match_op(next, SPECIAL_HASHHASH))
-				token = next;
-			else if (eof_token(next))
-				goto Econcat;
-		} else if (match_op(token->next, SPECIAL_HASHHASH)) {
-			try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+			token = handle_hash(p, arglist);
+			if (!token)
+				return NULL;
+		}
+		if (match_op(token->next, SPECIAL_HASHHASH)) {
+			token = handle_hashhash(token, arglist);
+			if (!token)
+				return NULL;
 		} else {
 			try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
 		}
 		if (token_type(token) == TOKEN_ERROR)
 			goto Earg;
-		last = token;
 	}
 	token = alloc_token(&expansion->pos);
 	token_type(token) = TOKEN_UNTAINT;
@@ -1093,10 +1246,6 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
 	*p = token;
 	return expansion;
 
-Equote:
-	sparse_error(token->pos, "'#' is not followed by a macro parameter");
-	return NULL;
-
 Econcat:
 	sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
 	return NULL;
@@ -1287,6 +1436,8 @@ static int handle_ifndef(struct stream *stream, struct token **line, struct toke
 	return preprocessor_if(stream, token, arg);
 }
 
+static const char *show_token_sequence(struct token *token);
+
 /*
  * Expression handling for #if and #elif; it differs from normal expansion
  * due to special treatment of "defined".
@@ -1709,6 +1860,7 @@ static void init_preprocessor(void)
 		{ "add_system",    handle_add_system },
 		{ "add_dirafter",  handle_add_dirafter },
 		{ "split_include", handle_split_include },
+		{ "argv_include",  handle_argv_include },
 	}, special[] = {
 		{ "ifdef",	handle_ifdef },
 		{ "ifndef",	handle_ifndef },
diff --git a/token.h b/token.h
index cd292331..20c23268 100644
--- a/token.h
+++ b/token.h
@@ -68,8 +68,8 @@ enum token_type {
 	TOKEN_ZERO_IDENT,
 	TOKEN_NUMBER,
 	TOKEN_CHAR,
-	TOKEN_WIDE_CHAR,
-	TOKEN_STRING,
+	TOKEN_WIDE_CHAR = TOKEN_CHAR + 5,
+	TOKEN_STRING = TOKEN_WIDE_CHAR + 5,
 	TOKEN_WIDE_STRING,
 	TOKEN_SPECIAL,
 	TOKEN_STREAMBEGIN,
@@ -165,9 +165,9 @@ struct token {
 		struct ident *ident;
 		unsigned int special;
 		struct string *string;
-		int character;
 		int argnum;
 		struct argcount count;
+		char embedded[4];
 	};
 };
 
@@ -198,6 +198,7 @@ extern const char *show_special(int);
 extern const char *show_ident(const struct ident *);
 extern const char *show_string(const struct string *string);
 extern const char *show_token(const struct token *);
+extern const char *quote_token(const struct token *);
 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 
diff --git a/tokenize.c b/tokenize.c
index d4f05e56..95f308e0 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -121,6 +121,42 @@ const char *show_string(const struct string *string)
 	return buffer;
 }
 
+static const char *show_char(const char *s, size_t len, char prefix, char delim)
+{
+	static char buffer[MAX_STRING + 4];
+	char *p = buffer;
+	if (prefix)
+		*p++ = prefix;
+	*p++ = delim;
+	memcpy(p, s, len);
+	p += len;
+	*p++ = delim;
+	*p++ = '\0';
+	return buffer;
+}
+
+static const char *quote_char(const char *s, size_t len, char prefix, char delim)
+{
+	static char buffer[2*MAX_STRING + 6];
+	size_t i;
+	char *p = buffer;
+	if (prefix)
+		*p++ = prefix;
+	if (delim == '"')
+		*p++ = '\\';
+	*p++ = delim;
+	for (i = 0; i < len; i++) {
+		if (s[i] == '"' || s[i] == '\\')
+			*p++ = '\\';
+		*p++ = s[i];
+	}
+	if (delim == '"')
+		*p++ = '\\';
+	*p++ = delim;
+	*p++ = '\0';
+	return buffer;
+}
+
 const char *show_token(const struct token *token)
 {
 	static char buffer[256];
@@ -137,10 +173,6 @@ const char *show_token(const struct token *token)
 	case TOKEN_IDENT:
 		return show_ident(token->ident);
 
-	case TOKEN_STRING:
-	case TOKEN_WIDE_STRING:
-		return show_string(token->string);
-
 	case TOKEN_NUMBER:
 		return token->number;
 
@@ -148,15 +180,23 @@ const char *show_token(const struct token *token)
 		return show_special(token->special);
 
 	case TOKEN_CHAR: 
-	case TOKEN_WIDE_CHAR: {
-		char *ptr = buffer;
-		int c = token->character;
-		*ptr++ = '\'';
-		ptr = charstr(ptr, c, '\'', 0);
-		*ptr++ = '\'';
-		*ptr++ = '\0';
-		return buffer;
-	}
+		return show_char(token->string->data,
+			token->string->length - 1, 0, '\'');
+	case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+		return show_char(token->embedded,
+			token_type(token) - TOKEN_CHAR, 0, '\'');
+	case TOKEN_WIDE_CHAR: 
+		return show_char(token->string->data,
+			token->string->length - 1, 'L', '\'');
+	case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+		return show_char(token->embedded,
+			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+	case TOKEN_STRING: 
+		return show_char(token->string->data,
+			token->string->length - 1, 0, '"');
+	case TOKEN_WIDE_STRING: 
+		return show_char(token->string->data,
+			token->string->length - 1, 'L', '"');
 
 	case TOKEN_STREAMBEGIN:
 		sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
@@ -180,6 +220,47 @@ const char *show_token(const struct token *token)
 	}
 }
 
+const char *quote_token(const struct token *token)
+{
+	static char buffer[256];
+
+	switch (token_type(token)) {
+	case TOKEN_ERROR:
+		return "syntax error";
+
+	case TOKEN_IDENT:
+		return show_ident(token->ident);
+
+	case TOKEN_NUMBER:
+		return token->number;
+
+	case TOKEN_SPECIAL:
+		return show_special(token->special);
+
+	case TOKEN_CHAR: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 0, '\'');
+	case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+		return quote_char(token->embedded,
+			token_type(token) - TOKEN_CHAR, 0, '\'');
+	case TOKEN_WIDE_CHAR: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 'L', '\'');
+	case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+		return quote_char(token->embedded,
+			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+	case TOKEN_STRING: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 0, '"');
+	case TOKEN_WIDE_STRING: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 'L', '"');
+	default:
+		sprintf(buffer, "unhandled token type '%d' ", token_type(token));
+		return buffer;
+	}
+}
+
 #define HASHED_INPUT_BITS (6)
 #define HASHED_INPUT (1 << HASHED_INPUT_BITS)
 #define HASH_PRIME 0x9e370001UL
@@ -241,10 +322,10 @@ static int nextchar_slow(stream_t *stream)
 	int offset = stream->offset;
 	int size = stream->size;
 	int c;
-	int spliced = 0, had_cr, had_backslash, complain;
+	int spliced = 0, had_cr, had_backslash;
 
 restart:
-	had_cr = had_backslash = complain = 0;
+	had_cr = had_backslash = 0;
 
 repeat:
 	if (offset >= size) {
@@ -258,48 +339,53 @@ repeat:
 	}
 
 	c = stream->buffer[offset++];
-
-	if (had_cr && c != '\n')
-		complain = 1;
+	if (had_cr)
+		goto check_lf;
 
 	if (c == '\r') {
 		had_cr = 1;
 		goto repeat;
 	}
 
-	stream->pos += (c == '\t') ? (tabstop - stream->pos % tabstop) : 1;
-
-	if (c == '\n') {
-		stream->line++;
-		stream->pos = 0;
-	}
-
+norm:
 	if (!had_backslash) {
-		if (c == '\\') {
+		switch (c) {
+		case '\t':
+			stream->pos += tabstop - stream->pos % tabstop;
+			break;
+		case '\n':
+			stream->line++;
+			stream->pos = 0;
+			stream->newline = 1;
+			break;
+		case '\\':
 			had_backslash = 1;
+			stream->pos++;
 			goto repeat;
+		default:
+			stream->pos++;
 		}
-		if (c == '\n')
-			stream->newline = 1;
 	} else {
 		if (c == '\n') {
-			if (complain)
-				warning(stream_pos(stream), "non-ASCII data stream");
+			stream->line++;
+			stream->pos = 0;
 			spliced = 1;
 			goto restart;
 		}
-		stream->pos--;
 		offset--;
 		c = '\\';
 	}
-
 out:
 	stream->offset = offset;
-	if (complain)
-		warning(stream_pos(stream), "non-ASCII data stream");
 
 	return c;
 
+check_lf:
+	if (c != '\n')
+		offset--;
+	c = '\n';
+	goto norm;
+
 got_eof:
 	if (had_backslash) {
 		c = '\\';
@@ -307,8 +393,6 @@ got_eof:
 	}
 	if (stream->pos)
 		warning(stream_pos(stream), "no newline at end of file");
-	else if (had_cr)
-		warning(stream_pos(stream), "non-ASCII data stream");
 	else if (spliced)
 		warning(stream_pos(stream), "backslash-newline at end of file");
 	return EOF;
@@ -380,22 +464,36 @@ enum {
 	Exp = 8,
 	Dot = 16,
 	ValidSecond = 32,
+	Quote = 64,
+	Escape = 128,
 };
 
 static const long cclass[257] = {
-	['0' + 1 ... '9' + 1] = Digit | Hex,
+	['0' + 1 ... '7' + 1] = Digit | Hex | Escape,	/* \<octal> */
+	['8' + 1 ... '9' + 1] = Digit | Hex,
 	['A' + 1 ... 'D' + 1] = Letter | Hex,
-	['E' + 1] = Letter | Hex | Exp,
+	['E' + 1] = Letter | Hex | Exp,	/* E<exp> */
 	['F' + 1] = Letter | Hex,
 	['G' + 1 ... 'O' + 1] = Letter,
-	['P' + 1] = Letter | Exp,
+	['P' + 1] = Letter | Exp,	/* P<exp> */
 	['Q' + 1 ... 'Z' + 1] = Letter,
-	['a' + 1 ... 'd' + 1] = Letter | Hex,
-	['e' + 1] = Letter | Hex | Exp,
-	['f' + 1] = Letter | Hex,
-	['g' + 1 ... 'o' + 1] = Letter,
-	['p' + 1] = Letter | Exp,
-	['q' + 1 ... 'z' + 1] = Letter,
+	['a' + 1 ... 'b' + 1] = Letter | Hex | Escape, /* \a, \b */
+	['c' + 1 ... 'd' + 1] = Letter | Hex,
+	['e' + 1] = Letter | Hex | Exp | Escape,/* \e, e<exp> */
+	['f' + 1] = Letter | Hex | Escape,	/* \f */
+	['g' + 1 ... 'm' + 1] = Letter,
+	['n' + 1] = Letter | Escape,	/* \n */
+	['o' + 1] = Letter,
+	['p' + 1] = Letter | Exp,	/* p<exp> */
+	['q' + 1] = Letter,
+	['r' + 1] = Letter | Escape,	/* \r */
+	['s' + 1] = Letter,
+	['t' + 1] = Letter | Escape,	/* \t */
+	['u' + 1] = Letter,
+	['v' + 1] = Letter | Escape,	/* \v */
+	['w' + 1] = Letter,
+	['x' + 1] = Letter | Escape,	/* \x<hex> */
+	['y' + 1 ... 'z' + 1] = Letter,
 	['_' + 1] = Letter,
 	['.' + 1] = Dot | ValidSecond,
 	['=' + 1] = ValidSecond,
@@ -406,6 +504,10 @@ static const long cclass[257] = {
 	['&' + 1] = ValidSecond,
 	['|' + 1] = ValidSecond,
 	['#' + 1] = ValidSecond,
+	['\'' + 1] = Quote | Escape,
+	['"' + 1] = Quote | Escape,
+	['\\' + 1] = Escape,
+	['?' + 1] = Escape,
 };
 
 /*
@@ -465,151 +567,74 @@ static int get_one_number(int c, int next, stream_t *stream)
 	return next;
 }
 
-static int escapechar(int first, int type, stream_t *stream, int *valp)
-{
-	int next, value;
-
-	next = nextchar(stream);
-	value = first;
-
-	if (first == '\n')
-		warning(stream_pos(stream), "Newline in string or character constant");
-
-	if (first == '\\' && next != EOF) {
-		value = next;
-		next = nextchar(stream);
-		if (value != type) {
-			switch (value) {
-			case 'a':
-				value = '\a';
-				break;
-			case 'b':
-				value = '\b';
-				break;
-			case 't':
-				value = '\t';
-				break;
-			case 'n':
-				value = '\n';
-				break;
-			case 'v':
-				value = '\v';
-				break;
-			case 'f':
-				value = '\f';
-				break;
-			case 'r':
-				value = '\r';
-				break;
-			case 'e':
-				value = '\e';
-				break;
-			case '\\':
-				break;
-			case '?':
-				break;
-			case '\'':
-				break;
-			case '"':
-				break;
-			case '\n':
-				warning(stream_pos(stream), "Newline in string or character constant");
-				break;
-			case '0'...'7': {
-				int nr = 2;
-				value -= '0';
-				while (next >= '0' && next <= '7') {
-					value = (value << 3) + (next-'0');
-					next = nextchar(stream);
-					if (!--nr)
-						break;
-				}
-				value &= 0xff;
-				break;
-			}
-			case 'x': {
-				int hex = hexval(next);
-				if (hex < 16) {
-					value = hex;
-					next = nextchar(stream);
-					while ((hex = hexval(next)) < 16) {
-						value = (value << 4) + hex;
-						next = nextchar(stream);
-					}
-					value &= 0xff;
-					break;
-				}
-			}
-			/* Fall through */
-			default:
-				warning(stream_pos(stream), "Unknown escape '%c'", value);
-			}
-		}
-		/* Mark it as escaped */
-		value |= 0x100;
-	}
-	*valp = value;
-	return next;
-}
-
-static int get_char_token(int next, stream_t *stream, enum token_type type)
-{
-	int value;
-	struct token *token;
-
-	next = escapechar(next, '\'', stream, &value);
-	if (value == '\'' || next != '\'') {
-		sparse_error(stream_pos(stream), "Bad character constant");
-		drop_token(stream);
-		return next;
-	}
-
-	token = stream->token;
-	token_type(token) = type;
-	token->character = value & 0xff;
-
-	add_token(stream);
-	return nextchar(stream);
-}
-
-static int get_string_token(int next, stream_t *stream, enum token_type type)
+static int eat_string(int next, stream_t *stream, enum token_type type)
 {
 	static char buffer[MAX_STRING];
 	struct string *string;
-	struct token *token;
+	struct token *token = stream->token;
 	int len = 0;
+	int escape;
+	int want_hex = 0;
+	char delim = type < TOKEN_STRING ? '\'' : '"';
 
-	for (;;) {
-		int val;
-		next = escapechar(next, '"', stream, &val);
-		if (val == '"')
-			break;
+	for (escape = 0; escape || next != delim; next = nextchar(stream)) {
+		if (len < MAX_STRING)
+			buffer[len] = next;
+		len++;
+		if (next == '\n') {
+			warning(stream_pos(stream),
+				"Newline in string or character constant");
+			if (delim == '\'') /* assume it's lost ' */
+				break;
+		}
 		if (next == EOF) {
-			warning(stream_pos(stream), "End of file in middle of string");
+			warning(stream_pos(stream),
+				"End of file in middle of string");
 			return next;
 		}
-		if (len < MAX_STRING)
-			buffer[len] = val;
-		len++;
+		if (!escape) {
+			if (want_hex && !(cclass[next + 1] & Hex))
+				warning(stream_pos(stream),
+					"\\x used with no following hex digits");
+			want_hex = 0;
+			escape = next == '\\';
+		} else {
+			if (!(cclass[next + 1] & Escape))
+				warning(stream_pos(stream),
+					"Unknown escape '%c'", next);
+			escape = 0;
+			want_hex = next == 'x';
+		}
 	}
-
+	if (want_hex)
+		warning(stream_pos(stream),
+			"\\x used with no following hex digits");
 	if (len > MAX_STRING) {
 		warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 		len = MAX_STRING;
 	}
-
-	string = __alloc_string(len+1);
-	memcpy(string->data, buffer, len);
-	string->data[len] = '\0';
-	string->length = len+1;
+	if (delim == '\'' && len <= 4) {
+		if (len == 0) {
+			sparse_error(stream_pos(stream),
+				"empty character constant");
+			return nextchar(stream);
+		}
+		token_type(token) = type + len;
+		memset(buffer + len, '\0', 4 - len);
+		memcpy(token->embedded, buffer, 4);
+	} else {
+		token_type(token) = type;
+		string = __alloc_string(len+1);
+		memcpy(string->data, buffer, len);
+		string->data[len] = '\0';
+		string->length = len+1;
+		token->string = string;
+	}
 
 	/* Pass it on.. */
 	token = stream->token;
-	token_type(token) = type;
-	token->string = string;
 	add_token(stream);
-	
-	return next;
+	return nextchar(stream);
 }
 
 static int drop_stream_eoln(stream_t *stream)
@@ -725,9 +750,9 @@ static int get_one_special(int c, stream_t *stream)
 			return get_one_number(c, next, stream);
 		break;
 	case '"':
-		return get_string_token(next, stream, TOKEN_STRING);
+		return eat_string(next, stream, TOKEN_STRING);
 	case '\'':
-		return get_char_token(next, stream, TOKEN_CHAR);
+		return eat_string(next, stream, TOKEN_CHAR);
 	case '/':
 		if (next == '/')
 			return drop_stream_eoln(stream);
@@ -901,17 +926,19 @@ static int get_one_identifier(int c, stream_t *stream)
 		buf[len] = next;
 		len++;
 	};
+	if (cclass[next + 1] & Quote) {
+		if (len == 1 && buf[0] == 'L') {
+			if (next == '\'')
+				return eat_string(nextchar(stream), stream,
+							TOKEN_WIDE_CHAR);
+			else
+				return eat_string(nextchar(stream), stream,
+							TOKEN_WIDE_STRING);
+		}
+	}
 	hash = ident_hash_end(hash);
-
 	ident = create_hashed_ident(buf, len, hash);
 
-	if (ident == &L_ident) {
-		if (next == '\'')
-			return get_char_token(nextchar(stream), stream, TOKEN_WIDE_CHAR);
-		if (next == '\"')
-			return get_string_token(nextchar(stream), stream, TOKEN_WIDE_STRING);
-	}
-
 	/* Pass it on.. */
 	token = stream->token;
 	token_type(token) = TOKEN_IDENT;
diff --git a/validation/__func__.c b/validation/__func__.c
new file mode 100644
index 00000000..65ce9282
--- /dev/null
+++ b/validation/__func__.c
@@ -0,0 +1,15 @@
+static void f(void)
+{
+	char *s1 = __func__;
+	char arr[2 * (sizeof __func__ == 2) - 1];
+	char *s2 = __func__ __func__;
+}
+/*
+ * check-name: __func__
+ * check-command: sparse -Wall $file
+ *
+ * check-error-start
+__func__.c:5:29: error: Expected ; at end of declaration
+__func__.c:5:29: error: got __func__
+ * check-error-end
+ */
diff --git a/validation/escapes.c b/validation/escapes.c
index 13f8f9c8..4a1b030e 100644
--- a/validation/escapes.c
+++ b/validation/escapes.c
@@ -8,14 +8,13 @@ static int bad_e[] = { '\c', '\0123', '\789', '\xdefg' };
  * check-name: Character escape sequences
  *
  * check-error-start
-escapes.c:6:27: warning: Unknown escape 'c'
-escapes.c:6:35: error: Bad character constant
-escapes.c:6:38: error: Bad character constant
-escapes.c:6:42: error: Bad character constant
-escapes.c:6:46: error: Bad character constant
-escapes.c:6:53: error: Bad character constant
-escapes.c:6:56: error: Bad character constant
-escapes.c:6:42: error: Expected } at end of initializer
-escapes.c:6:42: error: got 89
+escapes.c:6:26: warning: Unknown escape 'c'
+escapes.c:3:34: warning: hex escape sequence out of range
+escapes.c:3:44: warning: hex escape sequence out of range
+escapes.c:4:18: warning: hex escape sequence out of range
+escapes.c:6:30: warning: multi-character character constant
+escapes.c:6:39: warning: multi-character character constant
+escapes.c:6:47: warning: hex escape sequence out of range
+escapes.c:6:47: warning: multi-character character constant
  * check-error-end
  */
diff --git a/validation/foul-bitwise.c b/validation/foul-bitwise.c
index 9e21eab7..4b542cf9 100644
--- a/validation/foul-bitwise.c
+++ b/validation/foul-bitwise.c
@@ -24,7 +24,9 @@ static __le16 bar(__le16 a)
  * check-error-start
 foul-bitwise.c:9:16: warning: restricted __le16 degrades to integer
 foul-bitwise.c:9:22: warning: restricted __le16 degrades to integer
-foul-bitwise.c:19:16: error: incompatible types for operation (-)
-foul-bitwise.c:19:16:    argument has type restricted __le16 [usertype] a
+foul-bitwise.c:19:16: warning: restricted __le16 degrades to integer
+foul-bitwise.c:19:16: warning: incorrect type in return expression (different base types)
+foul-bitwise.c:19:16:    expected restricted __le16
+foul-bitwise.c:19:16:    got int
  * check-error-end
  */
diff --git a/validation/preprocessor/preprocessor14.c b/validation/preprocessor/preprocessor14.c
index 05fc248b..027af040 100644
--- a/validation/preprocessor/preprocessor14.c
+++ b/validation/preprocessor/preprocessor14.c
@@ -7,7 +7,6 @@ A(,1)
 B(,1)
 /*
  * check-name: Preprocessor #14
- * check-known-to-fail
  * check-command: sparse -E $file
  *
  * check-output-start
diff --git a/validation/preprocessor/preprocessor23.c b/validation/preprocessor/preprocessor23.c
new file mode 100644
index 00000000..25be5085
--- /dev/null
+++ b/validation/preprocessor/preprocessor23.c
@@ -0,0 +1,47 @@
+#define H(x,...) ,##x##__VA_ARGS__##,##__VA_ARGS__
+H()
+H(x)
+H(,)
+H(x,)
+H(,x)
+H(x,x)
+#define I(x,...) ,##x##__VA_ARGS__
+I()
+I(x)
+I(,)
+I(x,)
+I(,x)
+I(x,x)
+/*
+ * check-name: Preprocessor #23
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+,
+,x
+,,
+,x,
+,x,x
+,xx,x
+,x
+,
+,x
+,x
+,xx
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/preprocessor23.c:3:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:4:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:5:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:5:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:6:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:6:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:7:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:7:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:10:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:12:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:14:1: error: '##' failed: concatenation is not a valid token
+ * check-error-end
+ */
diff --git a/validation/preprocessor/stringify.c b/validation/preprocessor/stringify.c
new file mode 100644
index 00000000..7fe965d5
--- /dev/null
+++ b/validation/preprocessor/stringify.c
@@ -0,0 +1,29 @@
+#define A(x) #x
+A('a')
+A("a")
+A(a)
+A(\n)
+A('\n')
+A("\n")
+A('"')
+A("a\nb")
+A(L"a\nb")
+A('\12')
+/*
+ * check-name: Preprocessor #14
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+"'a'"
+"\"a\""
+"a"
+"\n"
+"'\\n'"
+"\"\\n\""
+"'\"'"
+"\"a\\nb\""
+"L\"a\\nb\""
+"'\\12'"
+ * check-output-end
+ */
diff --git a/validation/preprocessor/wide.c b/validation/preprocessor/wide.c
new file mode 100644
index 00000000..21b643ce
--- /dev/null
+++ b/validation/preprocessor/wide.c
@@ -0,0 +1,15 @@
+#define A(x) L##x
+A('a')
+A("bc")
+/*
+ * check-name: wide char token-pasting
+ * check-description: Used to cause infinite recursion.
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+L'a'
+L"bc"
+ * check-output-end
+ */
+
diff --git a/validation/wide.c b/validation/wide.c
new file mode 100644
index 00000000..847a680f
--- /dev/null
+++ b/validation/wide.c
@@ -0,0 +1,9 @@
+static char c = L'\x41';
+static int n = 1/(0x41 - L'\x41');
+/*
+ * check-name: wide character constants
+ *
+ * check-error-start
+wide.c:2:17: warning: division by zero
+ * check-error-end
+ */
author	Christopher Li <sparse@chrisli.org>	2013-02-12 23:01:45 -0800
committer	Christopher Li <sparse@chrisli.org>	2013-02-13 14:55:26 -0800
commit	1b8e012d10d2a5af2d4935e4a47df9c527399219 (patch)
tree	eb5c93ce49e6bc718f6da55fdb59c85df7a7f011
parent	6558e30ec635e26e767cee027936a0d0cae79bcb (diff)
parent	3dbed8ac24a2b4b24bc9776d89ea5328f1424a63 (diff)
download	sparse-dev-1b8e012d10d2a5af2d4935e4a47df9c527399219.tar.gz