aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
-rw-r--r--Makefile2
-rw-r--r--char.c131
-rw-r--r--char.h2
-rw-r--r--evaluate.c26
-rw-r--r--expression.c151
-rw-r--r--lib.c25
-rw-r--r--lib.h9
-rw-r--r--pre-process.c322
-rw-r--r--token.h7
-rw-r--r--tokenize.c393
-rw-r--r--validation/__func__.c15
-rw-r--r--validation/escapes.c17
-rw-r--r--validation/foul-bitwise.c6
-rw-r--r--validation/preprocessor/preprocessor14.c1
-rw-r--r--validation/preprocessor/preprocessor23.c47
-rw-r--r--validation/preprocessor/stringify.c29
-rw-r--r--validation/preprocessor/wide.c15
-rw-r--r--validation/wide.c9
18 files changed, 785 insertions, 422 deletions
diff --git a/Makefile b/Makefile
index 84e5df24..b195528e 100644
--- a/Makefile
+++ b/Makefile
@@ -93,7 +93,7 @@ LIB_H= token.h parse.h lib.h symbol.h scope.h expression.h target.h \
LIB_OBJS= target.o parse.o tokenize.o pre-process.o symbol.o lib.o scope.o \
expression.o show-parse.o evaluate.o expand.o inline.o linearize.o \
- sort.o allocate.o compat-$(OS).o ptrlist.o \
+ char.o sort.o allocate.o compat-$(OS).o ptrlist.o \
flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o
LIB_FILE= libsparse.a
diff --git a/char.c b/char.c
new file mode 100644
index 00000000..92674565
--- /dev/null
+++ b/char.c
@@ -0,0 +1,131 @@
+#include <string.h>
+#include "target.h"
+#include "lib.h"
+#include "allocate.h"
+#include "token.h"
+#include "expression.h"
+
+static const char *parse_escape(const char *p, unsigned *val, const char *end, int bits, struct position pos)
+{
+ unsigned c = *p++;
+ unsigned d;
+ if (c != '\\') {
+ *val = c;
+ return p;
+ }
+
+ c = *p++;
+ switch (c) {
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 't': c = '\t'; break;
+ case 'n': c = '\n'; break;
+ case 'v': c = '\v'; break;
+ case 'f': c = '\f'; break;
+ case 'r': c = '\r'; break;
+ case 'e': c = '\e'; break;
+ case 'x': {
+ unsigned mask = -(1U << (bits - 4));
+ for (c = 0; p < end; c = (c << 4) + d) {
+ d = hexval(*p++);
+ if (d > 16)
+ break;
+ if (c & mask) {
+ warning(pos,
+ "hex escape sequence out of range");
+ mask = 0;
+ }
+ }
+ break;
+ }
+ case '0'...'7': {
+ if (p + 2 < end)
+ end = p + 2;
+ c -= '0';
+ while (p < end && (d = *p++ - '0') < 8)
+ c = (c << 3) + d;
+ if ((c & 0400) && bits < 9)
+ warning(pos,
+ "octal escape sequence out of range");
+ break;
+ }
+ default: /* everything else is left as is */
+ break;
+ }
+ *val = c & ~((~0U << (bits - 1)) << 1);
+ return p;
+}
+
+void get_char_constant(struct token *token, unsigned long long *val)
+{
+ const char *p = token->embedded, *end;
+ unsigned v;
+ int type = token_type(token);
+ switch (type) {
+ case TOKEN_CHAR:
+ case TOKEN_WIDE_CHAR:
+ p = token->string->data;
+ end = p + token->string->length;
+ break;
+ case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+ end = p + type - TOKEN_CHAR;
+ break;
+ default:
+ end = p + type - TOKEN_WIDE_CHAR;
+ }
+ p = parse_escape(p, &v, end,
+ type < TOKEN_WIDE_CHAR ? bits_in_char : 32, token->pos);
+ if (p != end)
+ warning(token->pos,
+ "multi-character character constant");
+ *val = v;
+}
+
+struct token *get_string_constant(struct token *token, struct expression *expr)
+{
+ struct string *string = token->string;
+ struct token *next = token->next, *done = NULL;
+ int stringtype = token_type(token);
+ int is_wide = stringtype == TOKEN_WIDE_STRING;
+ static char buffer[MAX_STRING];
+ int len = 0;
+ int bits;
+
+ while (!done) {
+ switch (token_type(next)) {
+ case TOKEN_WIDE_STRING:
+ is_wide = 1;
+ case TOKEN_STRING:
+ next = next->next;
+ break;
+ default:
+ done = next;
+ }
+ }
+ bits = is_wide ? 32 : bits_in_char;
+ while (token != done) {
+ unsigned v;
+ const char *p = token->string->data;
+ const char *end = p + token->string->length - 1;
+ while (p < end) {
+ p = parse_escape(p, &v, end, bits, token->pos);
+ if (len < MAX_STRING)
+ buffer[len] = v;
+ len++;
+ }
+ token = token->next;
+ }
+ if (len > MAX_STRING) {
+ warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", len, MAX_STRING);
+ len = MAX_STRING;
+ }
+
+ if (len >= string->length) /* can't cannibalize */
+ string = __alloc_string(len+1);
+ string->length = len+1;
+ memcpy(string->data, buffer, len);
+ string->data[len] = '\0';
+ expr->string = string;
+ expr->wide = is_wide;
+ return token;
+}
diff --git a/char.h b/char.h
new file mode 100644
index 00000000..54be6b74
--- /dev/null
+++ b/char.h
@@ -0,0 +1,2 @@
+extern void get_char_constant(struct token *, unsigned long long *);
+extern struct token *get_string_constant(struct token *, struct expression *);
diff --git a/evaluate.c b/evaluate.c
index 0987a5e5..d09f271a 100644
--- a/evaluate.c
+++ b/evaluate.c
@@ -1696,16 +1696,20 @@ static struct symbol *evaluate_postop(struct expression *expr)
{
struct expression *op = expr->unop;
struct symbol *ctype = op->ctype;
- int class = classify_type(op->ctype, &ctype);
+ int class = classify_type(ctype, &ctype);
int multiply = 0;
+ if (!class || class & TYPE_COMPOUND) {
+ expression_error(expr, "need scalar for ++/--");
+ return NULL;
+ }
if (!lvalue_expression(expr->unop)) {
expression_error(expr, "need lvalue expression for ++/--");
return NULL;
}
if ((class & TYPE_RESTRICT) && restricted_unop(expr->op, &ctype))
- return bad_expr_type(expr);
+ unrestrict(expr, class, &ctype);
if (class & TYPE_NUM) {
multiply = 1;
@@ -1735,13 +1739,13 @@ static struct symbol *evaluate_sign(struct expression *expr)
/* should be an arithmetic type */
if (!(class & TYPE_NUM))
return bad_expr_type(expr);
- if (!(class & (TYPE_FLOAT|TYPE_RESTRICT))) {
- struct symbol *rtype = integer_promotion(ctype);
- expr->unop = cast_to(expr->unop, rtype);
- ctype = rtype;
- } else if ((class & TYPE_FLOAT) && expr->op != '~') {
- /* no conversions needed */
- } else if ((class & TYPE_RESTRICT) && !restricted_unop(expr->op, &ctype)) {
+ if (class & TYPE_RESTRICT)
+ goto Restr;
+Normal:
+ if (!(class & TYPE_FLOAT)) {
+ ctype = integer_promotion(ctype);
+ expr->unop = cast_to(expr->unop, ctype);
+ } else if (expr->op != '~') {
/* no conversions needed */
} else {
return bad_expr_type(expr);
@@ -1750,6 +1754,10 @@ static struct symbol *evaluate_sign(struct expression *expr)
*expr = *expr->unop;
expr->ctype = ctype;
return ctype;
+Restr:
+ if (restricted_unop(expr->op, &ctype))
+ unrestrict(expr, class, &ctype);
+ goto Normal;
}
static struct symbol *evaluate_preop(struct expression *expr)
diff --git a/expression.c b/expression.c
index 9f45c794..d2437c74 100644
--- a/expression.c
+++ b/expression.c
@@ -26,6 +26,7 @@
#include "scope.h"
#include "expression.h"
#include "target.h"
+#include "char.h"
static int match_oplist(int op, ...)
{
@@ -64,53 +65,50 @@ struct token *parens_expression(struct token *token, struct expression **expr, c
* Handle __func__, __FUNCTION__ and __PRETTY_FUNCTION__ token
* conversion
*/
-static int convert_one_fn_token(struct token *token)
+static struct symbol *handle_func(struct token *token)
{
- struct symbol *sym = current_fn;
-
- if (sym) {
- struct ident *ident = sym->ident;
- if (ident) {
- int len = ident->len;
- struct string *string;
-
- string = __alloc_string(len+1);
- memcpy(string->data, ident->name, len);
- string->data[len] = 0;
- string->length = len+1;
- token_type(token) = TOKEN_STRING;
- token->string = string;
- return 1;
- }
- }
- return 0;
-}
-
-static int convert_function(struct token *next)
-{
- int retval = 0;
- for (;;) {
- struct token *token = next;
- next = next->next;
- switch (token_type(token)) {
- case TOKEN_STRING:
- continue;
- case TOKEN_IDENT:
- if (token->ident == &__func___ident ||
- token->ident == &__FUNCTION___ident ||
- token->ident == &__PRETTY_FUNCTION___ident) {
- if (!convert_one_fn_token(token))
- break;
- retval = 1;
- continue;
- }
- /* Fall through */
- default:
- break;
- }
- break;
- }
- return retval;
+ struct ident *ident = token->ident;
+ struct symbol *decl, *array;
+ struct string *string;
+ int len;
+
+ if (ident != &__func___ident &&
+ ident != &__FUNCTION___ident &&
+ ident != &__PRETTY_FUNCTION___ident)
+ return NULL;
+
+ if (!current_fn)
+ return NULL;
+
+ /* OK, it's one of ours */
+ array = alloc_symbol(token->pos, SYM_ARRAY);
+ array->ctype.base_type = &char_ctype;
+ array->ctype.alignment = 1;
+ array->endpos = token->pos;
+ decl = alloc_symbol(token->pos, SYM_NODE);
+ decl->ctype.base_type = array;
+ decl->ctype.alignment = 1;
+ decl->ctype.modifiers = MOD_STATIC;
+ decl->endpos = token->pos;
+
+ /* function-scope, but in NS_SYMBOL */
+ bind_symbol(decl, ident, NS_LABEL);
+ decl->namespace = NS_SYMBOL;
+
+ len = current_fn->ident->len;
+ string = __alloc_string(len + 1);
+ memcpy(string->data, current_fn->ident->name, len);
+ string->data[len] = 0;
+ string->length = len + 1;
+
+ decl->initializer = alloc_expression(token->pos, EXPR_STRING);
+ decl->initializer->string = string;
+ decl->initializer->ctype = decl;
+ decl->array_size = alloc_const_expression(token->pos, len + 1);
+ array->array_size = decl->array_size;
+ decl->bit_size = array->bit_size = bytes_to_bits(len + 1);
+
+ return decl;
}
static struct token *parse_type(struct token *token, struct expression **tree)
@@ -220,50 +218,6 @@ static struct token *builtin_offsetof_expr(struct token *token,
}
}
-static struct token *string_expression(struct token *token, struct expression *expr)
-{
- struct string *string = token->string;
- struct token *next = token->next;
- int stringtype = token_type(token);
-
- convert_function(token);
-
- if (token_type(next) == stringtype) {
- int totlen = string->length-1;
- char *data;
-
- do {
- totlen += next->string->length-1;
- next = next->next;
- } while (token_type(next) == stringtype);
-
- if (totlen > MAX_STRING) {
- warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
- totlen = MAX_STRING;
- }
-
- string = __alloc_string(totlen+1);
- string->length = totlen+1;
- data = string->data;
- next = token;
- do {
- struct string *s = next->string;
- int len = s->length-1;
-
- if (len > totlen)
- len = totlen;
- totlen -= len;
-
- next = next->next;
- memcpy(data, s->data, len);
- data += len;
- } while (token_type(next) == stringtype);
- *data = '\0';
- }
- expr->string = string;
- return next;
-}
-
#ifndef ULLONG_MAX
#define ULLONG_MAX (~0ULL)
#endif
@@ -404,12 +358,11 @@ struct token *primary_expression(struct token *token, struct expression **tree)
struct expression *expr = NULL;
switch (token_type(token)) {
- case TOKEN_CHAR:
- case TOKEN_WIDE_CHAR:
+ case TOKEN_CHAR ... TOKEN_WIDE_CHAR + 4:
expr = alloc_expression(token->pos, EXPR_VALUE);
expr->flags = Int_const_expr;
- expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype;
- expr->value = (unsigned char) token->character;
+ expr->ctype = token_type(token) < TOKEN_WIDE_CHAR ? &int_ctype : &long_ctype;
+ get_char_constant(token, &expr->value);
token = token->next;
break;
@@ -434,8 +387,7 @@ struct token *primary_expression(struct token *token, struct expression **tree)
struct token *next = token->next;
if (!sym) {
- if (convert_function(token))
- goto handle_string;
+ sym = handle_func(token);
if (token->ident == &__builtin_types_compatible_p_ident) {
token = builtin_types_compatible_p_expr(token, &expr);
break;
@@ -473,13 +425,10 @@ struct token *primary_expression(struct token *token, struct expression **tree)
}
case TOKEN_STRING:
- case TOKEN_WIDE_STRING: {
- handle_string:
+ case TOKEN_WIDE_STRING:
expr = alloc_expression(token->pos, EXPR_STRING);
- expr->wide = token_type(token) == TOKEN_WIDE_STRING;
- token = string_expression(token, expr);
+ token = get_string_constant(token, expr);
break;
- }
case TOKEN_SPECIAL:
if (token->special == '(') {
diff --git a/lib.c b/lib.c
index bb814f2e..6bd10d36 100644
--- a/lib.c
+++ b/lib.c
@@ -234,8 +234,8 @@ int arch_m64 = ARCH_M64_DEFAULT;
int arch_msize_long = 0;
#define CMDLINE_INCLUDE 20
-int cmdline_include_nr = 0;
-struct cmdline_include cmdline_include[CMDLINE_INCLUDE];
+static int cmdline_include_nr = 0;
+static char *cmdline_include[CMDLINE_INCLUDE];
void add_pre_buffer(const char *fmt, ...)
@@ -308,16 +308,9 @@ static char **handle_switch_I(char *arg, char **next)
static void add_cmdline_include(char *filename)
{
- int fd = open(filename, O_RDONLY);
- if (fd < 0) {
- perror(filename);
- return;
- }
if (cmdline_include_nr >= CMDLINE_INCLUDE)
die("too many include files for %s\n", filename);
- cmdline_include[cmdline_include_nr].filename = filename;
- cmdline_include[cmdline_include_nr].fd = fd;
- cmdline_include_nr++;
+ cmdline_include[cmdline_include_nr++] = filename;
}
static char **handle_switch_i(char *arg, char **next)
@@ -930,19 +923,13 @@ static struct symbol_list *sparse_file(const char *filename)
*/
static struct symbol_list *sparse_initial(void)
{
- struct token *token;
int i;
// Prepend any "include" file to the stream.
// We're in global scope, it will affect all files!
- token = NULL;
- for (i = cmdline_include_nr - 1; i >= 0; i--)
- token = tokenize(cmdline_include[i].filename, cmdline_include[i].fd,
- token, includepath);
-
- // Prepend the initial built-in stream
- if (token)
- pre_buffer_end->next = token;
+ for (i = 0; i < cmdline_include_nr; i++)
+ add_pre_buffer("#argv_include \"%s\"\n", cmdline_include[i]);
+
return sparse_tokenstream(pre_buffer_begin);
}
diff --git a/lib.h b/lib.h
index 2cea2520..ee954fed 100644
--- a/lib.h
+++ b/lib.h
@@ -41,15 +41,6 @@ struct position {
noexpand:1;
};
-struct cmdline_include {
- char *filename;
- int fd;
-};
-
-extern struct cmdline_include cmdline_include[];
-extern int cmdline_include_nr;
-
-
struct ident;
struct token;
struct symbol;
diff --git a/pre-process.c b/pre-process.c
index 8a16f8b3..e5f56b40 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -82,8 +82,6 @@ static struct token *alloc_token(struct position *pos)
return token;
}
-static const char *show_token_sequence(struct token *token);
-
/* Expand symbol 'sym' at '*list' */
static int expand(struct token **, struct symbol *);
@@ -340,9 +338,35 @@ static struct token *dup_list(struct token *list)
return res;
}
+static const char *quote_token_sequence(struct token *token)
+{
+ static char buffer[1024];
+ char *ptr = buffer;
+ int whitespace = 0;
+
+ while (!eof_token(token)) {
+ const char *val = quote_token(token);
+ int len = strlen(val);
+
+ if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
+ sparse_error(token->pos, "too long token expansion");
+ break;
+ }
+
+ if (whitespace)
+ *ptr++ = ' ';
+ memcpy(ptr, val, len);
+ ptr += len;
+ token = token->next;
+ whitespace = token->pos.whitespace;
+ }
+ *ptr = 0;
+ return buffer;
+}
+
static struct token *stringify(struct token *arg)
{
- const char *s = show_token_sequence(arg);
+ const char *s = quote_token_sequence(arg);
int size = strlen(s)+1;
struct token *token = __alloc_token(0);
struct string *string = __alloc_string(size);
@@ -383,6 +407,8 @@ static void expand_arguments(int count, struct arg *args)
* Possibly valid combinations:
* - ident + ident -> ident
* - ident + number -> ident unless number contains '.', '+' or '-'.
+ * - 'L' + char constant -> wide char constant
+ * - 'L' + string literal -> wide string literal
* - number + number -> number
* - number + ident -> number
* - number + '.' -> number
@@ -398,6 +424,13 @@ static enum token_type combine(struct token *left, struct token *right, char *p)
if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
return TOKEN_ERROR;
+ if (t1 == TOKEN_IDENT && left->ident == &L_ident) {
+ if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR)
+ return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR;
+ if (t2 == TOKEN_STRING)
+ return TOKEN_WIDE_STRING;
+ }
+
if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
return TOKEN_ERROR;
@@ -440,9 +473,10 @@ static enum token_type combine(struct token *left, struct token *right, char *p)
static int merge(struct token *left, struct token *right)
{
static char buffer[512];
+ enum token_type res = combine(left, right, buffer);
int n;
- switch (combine(left, right, buffer)) {
+ switch (res) {
case TOKEN_IDENT:
left->ident = built_in_ident(buffer);
left->pos.noexpand = 0;
@@ -465,6 +499,21 @@ static int merge(struct token *left, struct token *right)
return 1;
}
}
+ break;
+
+ case TOKEN_WIDE_CHAR:
+ case TOKEN_WIDE_STRING:
+ token_type(left) = res;
+ left->pos.noexpand = 0;
+ left->string = right->string;
+ return 1;
+
+ case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+ token_type(left) = res;
+ left->pos.noexpand = 0;
+ memcpy(left->embedded, right->embedded, 4);
+ return 1;
+
default:
;
}
@@ -472,12 +521,12 @@ static int merge(struct token *left, struct token *right)
return 0;
}
-static struct token *dup_token(struct token *token, struct position *streampos, struct position *pos)
+static struct token *dup_token(struct token *token, struct position *streampos)
{
struct token *alloc = alloc_token(streampos);
token_type(alloc) = token_type(token);
- alloc->pos.newline = pos->newline;
- alloc->pos.whitespace = pos->whitespace;
+ alloc->pos.newline = token->pos.newline;
+ alloc->pos.whitespace = token->pos.whitespace;
alloc->number = token->number;
alloc->pos.noexpand = token->pos.noexpand;
return alloc;
@@ -489,7 +538,7 @@ static struct token **copy(struct token **where, struct token *list, int *count)
while (!eof_token(list)) {
struct token *token;
if (need_copy)
- token = dup_token(list, &list->pos, &list->pos);
+ token = dup_token(list, &list->pos);
else
token = list;
if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
@@ -502,17 +551,37 @@ static struct token **copy(struct token **where, struct token *list, int *count)
return where;
}
+static int handle_kludge(struct token **p, struct arg *args)
+{
+ struct token *t = (*p)->next->next;
+ while (1) {
+ struct arg *v = &args[t->argnum];
+ if (token_type(t->next) != TOKEN_CONCAT) {
+ if (v->arg) {
+ /* ignore the first ## */
+ *p = (*p)->next;
+ return 0;
+ }
+ /* skip the entire thing */
+ *p = t;
+ return 1;
+ }
+ if (v->arg && !eof_token(v->arg))
+ return 0; /* no magic */
+ t = t->next->next;
+ }
+}
+
static struct token **substitute(struct token **list, struct token *body, struct arg *args)
{
- struct token *token = *list;
- struct position *base_pos = &token->pos;
- struct position *pos = base_pos;
+ struct position *base_pos = &(*list)->pos;
int *count;
enum {Normal, Placeholder, Concat} state = Normal;
- for (; !eof_token(body); body = body->next, pos = &body->pos) {
+ for (; !eof_token(body); body = body->next) {
struct token *added, *arg;
struct token **tail;
+ struct token *t;
switch (token_type(body)) {
case TOKEN_GNU_KLUDGE:
@@ -520,13 +589,20 @@ static struct token **substitute(struct token **list, struct token *body, struct
* GNU kludge: if we had <comma>##<vararg>, behaviour
* depends on whether we had enough arguments to have
* a vararg. If we did, ## is just ignored. Otherwise
- * both , and ## are ignored. Comma should come from
- * the body of macro and not be an argument of earlier
- * concatenation.
+ * both , and ## are ignored. Worse, there can be
+ * an arbitrary number of ##<arg> in between; if all of
+ * those are empty, we act as if they hadn't been there,
+ * otherwise we act as if the kludge didn't exist.
*/
- if (!args[body->next->argnum].arg)
+ t = body;
+ if (handle_kludge(&body, args)) {
+ if (state == Concat)
+ state = Normal;
+ else
+ state = Placeholder;
continue;
- added = dup_token(body, base_pos, pos);
+ }
+ added = dup_token(t, base_pos);
token_type(added) = TOKEN_SPECIAL;
tail = &added->next;
break;
@@ -557,8 +633,8 @@ static struct token **substitute(struct token **list, struct token *body, struct
}
copy_arg:
tail = copy(&added, arg, count);
- added->pos.newline = pos->newline;
- added->pos.whitespace = pos->whitespace;
+ added->pos.newline = body->pos.newline;
+ added->pos.whitespace = body->pos.whitespace;
break;
case TOKEN_CONCAT:
@@ -569,14 +645,14 @@ static struct token **substitute(struct token **list, struct token *body, struct
continue;
case TOKEN_IDENT:
- added = dup_token(body, base_pos, pos);
+ added = dup_token(body, base_pos);
if (added->ident->tainted)
added->pos.noexpand = 1;
tail = &added->next;
break;
default:
- added = dup_token(body, base_pos, pos);
+ added = dup_token(body, base_pos);
tail = &added->next;
break;
}
@@ -625,6 +701,14 @@ static int expand(struct token **list, struct symbol *sym)
last = token->next;
tail = substitute(list, sym->expansion, args);
+ /*
+ * Note that it won't be eof - at least TOKEN_UNTAINT will be there.
+ * We still can lose the newline flag if the sucker expands to nothing,
+ * but the price of dealing with that is probably too high (we'd need
+ * to collect the flags during scan_next())
+ */
+ (*list)->pos.newline = token->pos.newline;
+ (*list)->pos.whitespace = token->pos.whitespace;
*tail = last;
return 0;
@@ -767,31 +851,6 @@ static int do_include_path(const char **pptr, struct token **list, struct token
return 0;
}
-static void do_include(int local, struct stream *stream, struct token **list, struct token *token, const char *filename, const char **path)
-{
- int flen = strlen(filename) + 1;
-
- /* Absolute path? */
- if (filename[0] == '/') {
- if (try_include("", filename, flen, list, includepath))
- return;
- goto out;
- }
-
- /* Dir of input file is first dir to search for quoted includes */
- set_stream_include_path(stream);
-
- if (!path)
- /* Do not search quote include if <> is in use */
- path = local ? quote_includepath : angle_includepath;
-
- /* Check the standard include paths.. */
- if (do_include_path(path, list, token, filename, flen))
- return;
-out:
- error_die(token->pos, "unable to open '%s'", filename);
-}
-
static int free_preprocessor_line(struct token *token)
{
while (token_type(token) != TOKEN_EOF) {
@@ -802,11 +861,13 @@ static int free_preprocessor_line(struct token *token)
return 1;
}
-static int handle_include_path(struct stream *stream, struct token **list, struct token *token, const char **path)
+static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how)
{
const char *filename;
struct token *next;
+ const char **path;
int expect;
+ int flen;
next = token->next;
expect = '>';
@@ -819,20 +880,52 @@ static int handle_include_path(struct stream *stream, struct token **list, struc
expect = '>';
}
}
+
token = next->next;
filename = token_name_sequence(token, expect, token);
- do_include(!expect, stream, list, token, filename, path);
- return 0;
+ flen = strlen(filename) + 1;
+
+ /* Absolute path? */
+ if (filename[0] == '/') {
+ if (try_include("", filename, flen, list, includepath))
+ return 0;
+ goto out;
+ }
+
+ switch (how) {
+ case 1:
+ path = stream->next_path;
+ break;
+ case 2:
+ includepath[0] = "";
+ path = includepath;
+ break;
+ default:
+ /* Dir of input file is first dir to search for quoted includes */
+ set_stream_include_path(stream);
+ path = expect ? angle_includepath : quote_includepath;
+ break;
+ }
+ /* Check the standard include paths.. */
+ if (do_include_path(path, list, token, filename, flen))
+ return 0;
+out:
+ error_die(token->pos, "unable to open '%s'", filename);
}
static int handle_include(struct stream *stream, struct token **list, struct token *token)
{
- return handle_include_path(stream, list, token, NULL);
+ return handle_include_path(stream, list, token, 0);
}
static int handle_include_next(struct stream *stream, struct token **list, struct token *token)
{
- return handle_include_path(stream, list, token, stream->next_path);
+ return handle_include_path(stream, list, token, 1);
+}
+
+static int handle_argv_include(struct stream *stream, struct token **list, struct token *token)
+{
+ return handle_include_path(stream, list, token, 2);
}
static int token_different(struct token *t1, struct token *t2)
@@ -863,10 +956,12 @@ static int token_different(struct token *t1, struct token *t2)
case TOKEN_STR_ARGUMENT:
different = t1->argnum != t2->argnum;
break;
+ case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+ case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+ different = memcmp(t1->embedded, t2->embedded, 4);
+ break;
case TOKEN_CHAR:
case TOKEN_WIDE_CHAR:
- different = t1->character != t2->character;
- break;
case TOKEN_STRING:
case TOKEN_WIDE_STRING: {
struct string *s1, *s2;
@@ -1035,6 +1130,10 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
}
if (n)
return count->vararg ? 2 : 1;
+ /*
+ * XXX - need saner handling of that
+ * (>= 1024 instances of argument)
+ */
token_type(token) = TOKEN_ERROR;
return -1;
}
@@ -1042,49 +1141,103 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
return 0;
}
+static struct token *handle_hash(struct token **p, struct token *arglist)
+{
+ struct token *token = *p;
+ if (arglist) {
+ struct token *next = token->next;
+ if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
+ goto Equote;
+ next->pos.whitespace = token->pos.whitespace;
+ __free_token(token);
+ token = *p = next;
+ } else {
+ token->pos.noexpand = 1;
+ }
+ return token;
+
+Equote:
+ sparse_error(token->pos, "'#' is not followed by a macro parameter");
+ return NULL;
+}
+
+/* token->next is ## */
+static struct token *handle_hashhash(struct token *token, struct token *arglist)
+{
+ struct token *last = token;
+ struct token *concat;
+ int state = match_op(token, ',');
+
+ try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+
+ while (1) {
+ struct token *t;
+ int is_arg;
+
+ /* eat duplicate ## */
+ concat = token->next;
+ while (match_op(t = concat->next, SPECIAL_HASHHASH)) {
+ token->next = t;
+ __free_token(concat);
+ concat = t;
+ }
+ token_type(concat) = TOKEN_CONCAT;
+
+ if (eof_token(t))
+ goto Econcat;
+
+ if (match_op(t, '#')) {
+ t = handle_hash(&concat->next, arglist);
+ if (!t)
+ return NULL;
+ }
+
+ is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
+
+ if (state == 1 && is_arg) {
+ state = is_arg;
+ } else {
+ last = t;
+ state = match_op(t, ',');
+ }
+
+ token = t;
+ if (!match_op(token->next, SPECIAL_HASHHASH))
+ break;
+ }
+ /* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */
+ if (state == 2)
+ token_type(last) = TOKEN_GNU_KLUDGE;
+ return token;
+
+Econcat:
+ sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion");
+ return NULL;
+}
+
static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
{
struct token *token = expansion;
struct token **p;
- struct token *last = NULL;
if (match_op(token, SPECIAL_HASHHASH))
goto Econcat;
for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
if (match_op(token, '#')) {
- if (arglist) {
- struct token *next = token->next;
- if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
- goto Equote;
- next->pos.whitespace = token->pos.whitespace;
- token = *p = next;
- } else {
- token->pos.noexpand = 1;
- }
- } else if (match_op(token, SPECIAL_HASHHASH)) {
- struct token *next = token->next;
- int arg = try_arg(next, TOKEN_QUOTED_ARGUMENT, arglist);
- token_type(token) = TOKEN_CONCAT;
- if (arg) {
- token = next;
- /* GNU kludge */
- if (arg == 2 && last && match_op(last, ',')) {
- token_type(last) = TOKEN_GNU_KLUDGE;
- last->next = token;
- }
- } else if (match_op(next, SPECIAL_HASHHASH))
- token = next;
- else if (eof_token(next))
- goto Econcat;
- } else if (match_op(token->next, SPECIAL_HASHHASH)) {
- try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+ token = handle_hash(p, arglist);
+ if (!token)
+ return NULL;
+ }
+ if (match_op(token->next, SPECIAL_HASHHASH)) {
+ token = handle_hashhash(token, arglist);
+ if (!token)
+ return NULL;
} else {
try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
}
if (token_type(token) == TOKEN_ERROR)
goto Earg;
- last = token;
}
token = alloc_token(&expansion->pos);
token_type(token) = TOKEN_UNTAINT;
@@ -1093,10 +1246,6 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
*p = token;
return expansion;
-Equote:
- sparse_error(token->pos, "'#' is not followed by a macro parameter");
- return NULL;
-
Econcat:
sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
return NULL;
@@ -1287,6 +1436,8 @@ static int handle_ifndef(struct stream *stream, struct token **line, struct toke
return preprocessor_if(stream, token, arg);
}
+static const char *show_token_sequence(struct token *token);
+
/*
* Expression handling for #if and #elif; it differs from normal expansion
* due to special treatment of "defined".
@@ -1709,6 +1860,7 @@ static void init_preprocessor(void)
{ "add_system", handle_add_system },
{ "add_dirafter", handle_add_dirafter },
{ "split_include", handle_split_include },
+ { "argv_include", handle_argv_include },
}, special[] = {
{ "ifdef", handle_ifdef },
{ "ifndef", handle_ifndef },
diff --git a/token.h b/token.h
index cd292331..20c23268 100644
--- a/token.h
+++ b/token.h
@@ -68,8 +68,8 @@ enum token_type {
TOKEN_ZERO_IDENT,
TOKEN_NUMBER,
TOKEN_CHAR,
- TOKEN_WIDE_CHAR,
- TOKEN_STRING,
+ TOKEN_WIDE_CHAR = TOKEN_CHAR + 5,
+ TOKEN_STRING = TOKEN_WIDE_CHAR + 5,
TOKEN_WIDE_STRING,
TOKEN_SPECIAL,
TOKEN_STREAMBEGIN,
@@ -165,9 +165,9 @@ struct token {
struct ident *ident;
unsigned int special;
struct string *string;
- int character;
int argnum;
struct argcount count;
+ char embedded[4];
};
};
@@ -198,6 +198,7 @@ extern const char *show_special(int);
extern const char *show_ident(const struct ident *);
extern const char *show_string(const struct string *string);
extern const char *show_token(const struct token *);
+extern const char *quote_token(const struct token *);
extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
diff --git a/tokenize.c b/tokenize.c
index d4f05e56..95f308e0 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -121,6 +121,42 @@ const char *show_string(const struct string *string)
return buffer;
}
+static const char *show_char(const char *s, size_t len, char prefix, char delim)
+{
+ static char buffer[MAX_STRING + 4];
+ char *p = buffer;
+ if (prefix)
+ *p++ = prefix;
+ *p++ = delim;
+ memcpy(p, s, len);
+ p += len;
+ *p++ = delim;
+ *p++ = '\0';
+ return buffer;
+}
+
+static const char *quote_char(const char *s, size_t len, char prefix, char delim)
+{
+ static char buffer[2*MAX_STRING + 6];
+ size_t i;
+ char *p = buffer;
+ if (prefix)
+ *p++ = prefix;
+ if (delim == '"')
+ *p++ = '\\';
+ *p++ = delim;
+ for (i = 0; i < len; i++) {
+ if (s[i] == '"' || s[i] == '\\')
+ *p++ = '\\';
+ *p++ = s[i];
+ }
+ if (delim == '"')
+ *p++ = '\\';
+ *p++ = delim;
+ *p++ = '\0';
+ return buffer;
+}
+
const char *show_token(const struct token *token)
{
static char buffer[256];
@@ -137,10 +173,6 @@ const char *show_token(const struct token *token)
case TOKEN_IDENT:
return show_ident(token->ident);
- case TOKEN_STRING:
- case TOKEN_WIDE_STRING:
- return show_string(token->string);
-
case TOKEN_NUMBER:
return token->number;
@@ -148,15 +180,23 @@ const char *show_token(const struct token *token)
return show_special(token->special);
case TOKEN_CHAR:
- case TOKEN_WIDE_CHAR: {
- char *ptr = buffer;
- int c = token->character;
- *ptr++ = '\'';
- ptr = charstr(ptr, c, '\'', 0);
- *ptr++ = '\'';
- *ptr++ = '\0';
- return buffer;
- }
+ return show_char(token->string->data,
+ token->string->length - 1, 0, '\'');
+ case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+ return show_char(token->embedded,
+ token_type(token) - TOKEN_CHAR, 0, '\'');
+ case TOKEN_WIDE_CHAR:
+ return show_char(token->string->data,
+ token->string->length - 1, 'L', '\'');
+ case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+ return show_char(token->embedded,
+ token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+ case TOKEN_STRING:
+ return show_char(token->string->data,
+ token->string->length - 1, 0, '"');
+ case TOKEN_WIDE_STRING:
+ return show_char(token->string->data,
+ token->string->length - 1, 'L', '"');
case TOKEN_STREAMBEGIN:
sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
@@ -180,6 +220,47 @@ const char *show_token(const struct token *token)
}
}
+const char *quote_token(const struct token *token)
+{
+ static char buffer[256];
+
+ switch (token_type(token)) {
+ case TOKEN_ERROR:
+ return "syntax error";
+
+ case TOKEN_IDENT:
+ return show_ident(token->ident);
+
+ case TOKEN_NUMBER:
+ return token->number;
+
+ case TOKEN_SPECIAL:
+ return show_special(token->special);
+
+ case TOKEN_CHAR:
+ return quote_char(token->string->data,
+ token->string->length - 1, 0, '\'');
+ case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+ return quote_char(token->embedded,
+ token_type(token) - TOKEN_CHAR, 0, '\'');
+ case TOKEN_WIDE_CHAR:
+ return quote_char(token->string->data,
+ token->string->length - 1, 'L', '\'');
+ case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+ return quote_char(token->embedded,
+ token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+ case TOKEN_STRING:
+ return quote_char(token->string->data,
+ token->string->length - 1, 0, '"');
+ case TOKEN_WIDE_STRING:
+ return quote_char(token->string->data,
+ token->string->length - 1, 'L', '"');
+ default:
+ sprintf(buffer, "unhandled token type '%d' ", token_type(token));
+ return buffer;
+ }
+}
+
#define HASHED_INPUT_BITS (6)
#define HASHED_INPUT (1 << HASHED_INPUT_BITS)
#define HASH_PRIME 0x9e370001UL
@@ -241,10 +322,10 @@ static int nextchar_slow(stream_t *stream)
int offset = stream->offset;
int size = stream->size;
int c;
- int spliced = 0, had_cr, had_backslash, complain;
+ int spliced = 0, had_cr, had_backslash;
restart:
- had_cr = had_backslash = complain = 0;
+ had_cr = had_backslash = 0;
repeat:
if (offset >= size) {
@@ -258,48 +339,53 @@ repeat:
}
c = stream->buffer[offset++];
-
- if (had_cr && c != '\n')
- complain = 1;
+ if (had_cr)
+ goto check_lf;
if (c == '\r') {
had_cr = 1;
goto repeat;
}
- stream->pos += (c == '\t') ? (tabstop - stream->pos % tabstop) : 1;
-
- if (c == '\n') {
- stream->line++;
- stream->pos = 0;
- }
-
+norm:
if (!had_backslash) {
- if (c == '\\') {
+ switch (c) {
+ case '\t':
+ stream->pos += tabstop - stream->pos % tabstop;
+ break;
+ case '\n':
+ stream->line++;
+ stream->pos = 0;
+ stream->newline = 1;
+ break;
+ case '\\':
had_backslash = 1;
+ stream->pos++;
goto repeat;
+ default:
+ stream->pos++;
}
- if (c == '\n')
- stream->newline = 1;
} else {
if (c == '\n') {
- if (complain)
- warning(stream_pos(stream), "non-ASCII data stream");
+ stream->line++;
+ stream->pos = 0;
spliced = 1;
goto restart;
}
- stream->pos--;
offset--;
c = '\\';
}
-
out:
stream->offset = offset;
- if (complain)
- warning(stream_pos(stream), "non-ASCII data stream");
return c;
+check_lf:
+ if (c != '\n')
+ offset--;
+ c = '\n';
+ goto norm;
+
got_eof:
if (had_backslash) {
c = '\\';
@@ -307,8 +393,6 @@ got_eof:
}
if (stream->pos)
warning(stream_pos(stream), "no newline at end of file");
- else if (had_cr)
- warning(stream_pos(stream), "non-ASCII data stream");
else if (spliced)
warning(stream_pos(stream), "backslash-newline at end of file");
return EOF;
@@ -380,22 +464,36 @@ enum {
Exp = 8,
Dot = 16,
ValidSecond = 32,
+ Quote = 64,
+ Escape = 128,
};
static const long cclass[257] = {
- ['0' + 1 ... '9' + 1] = Digit | Hex,
+ ['0' + 1 ... '7' + 1] = Digit | Hex | Escape, /* \<octal> */
+ ['8' + 1 ... '9' + 1] = Digit | Hex,
['A' + 1 ... 'D' + 1] = Letter | Hex,
- ['E' + 1] = Letter | Hex | Exp,
+ ['E' + 1] = Letter | Hex | Exp, /* E<exp> */
['F' + 1] = Letter | Hex,
['G' + 1 ... 'O' + 1] = Letter,
- ['P' + 1] = Letter | Exp,
+ ['P' + 1] = Letter | Exp, /* P<exp> */
['Q' + 1 ... 'Z' + 1] = Letter,
- ['a' + 1 ... 'd' + 1] = Letter | Hex,
- ['e' + 1] = Letter | Hex | Exp,
- ['f' + 1] = Letter | Hex,
- ['g' + 1 ... 'o' + 1] = Letter,
- ['p' + 1] = Letter | Exp,
- ['q' + 1 ... 'z' + 1] = Letter,
+ ['a' + 1 ... 'b' + 1] = Letter | Hex | Escape, /* \a, \b */
+ ['c' + 1 ... 'd' + 1] = Letter | Hex,
+ ['e' + 1] = Letter | Hex | Exp | Escape,/* \e, e<exp> */
+ ['f' + 1] = Letter | Hex | Escape, /* \f */
+ ['g' + 1 ... 'm' + 1] = Letter,
+ ['n' + 1] = Letter | Escape, /* \n */
+ ['o' + 1] = Letter,
+ ['p' + 1] = Letter | Exp, /* p<exp> */
+ ['q' + 1] = Letter,
+ ['r' + 1] = Letter | Escape, /* \r */
+ ['s' + 1] = Letter,
+ ['t' + 1] = Letter | Escape, /* \t */
+ ['u' + 1] = Letter,
+ ['v' + 1] = Letter | Escape, /* \v */
+ ['w' + 1] = Letter,
+ ['x' + 1] = Letter | Escape, /* \x<hex> */
+ ['y' + 1 ... 'z' + 1] = Letter,
['_' + 1] = Letter,
['.' + 1] = Dot | ValidSecond,
['=' + 1] = ValidSecond,
@@ -406,6 +504,10 @@ static const long cclass[257] = {
['&' + 1] = ValidSecond,
['|' + 1] = ValidSecond,
['#' + 1] = ValidSecond,
+ ['\'' + 1] = Quote | Escape,
+ ['"' + 1] = Quote | Escape,
+ ['\\' + 1] = Escape,
+ ['?' + 1] = Escape,
};
/*
@@ -465,151 +567,74 @@ static int get_one_number(int c, int next, stream_t *stream)
return next;
}
-static int escapechar(int first, int type, stream_t *stream, int *valp)
-{
- int next, value;
-
- next = nextchar(stream);
- value = first;
-
- if (first == '\n')
- warning(stream_pos(stream), "Newline in string or character constant");
-
- if (first == '\\' && next != EOF) {
- value = next;
- next = nextchar(stream);
- if (value != type) {
- switch (value) {
- case 'a':
- value = '\a';
- break;
- case 'b':
- value = '\b';
- break;
- case 't':
- value = '\t';
- break;
- case 'n':
- value = '\n';
- break;
- case 'v':
- value = '\v';
- break;
- case 'f':
- value = '\f';
- break;
- case 'r':
- value = '\r';
- break;
- case 'e':
- value = '\e';
- break;
- case '\\':
- break;
- case '?':
- break;
- case '\'':
- break;
- case '"':
- break;
- case '\n':
- warning(stream_pos(stream), "Newline in string or character constant");
- break;
- case '0'...'7': {
- int nr = 2;
- value -= '0';
- while (next >= '0' && next <= '7') {
- value = (value << 3) + (next-'0');
- next = nextchar(stream);
- if (!--nr)
- break;
- }
- value &= 0xff;
- break;
- }
- case 'x': {
- int hex = hexval(next);
- if (hex < 16) {
- value = hex;
- next = nextchar(stream);
- while ((hex = hexval(next)) < 16) {
- value = (value << 4) + hex;
- next = nextchar(stream);
- }
- value &= 0xff;
- break;
- }
- }
- /* Fall through */
- default:
- warning(stream_pos(stream), "Unknown escape '%c'", value);
- }
- }
- /* Mark it as escaped */
- value |= 0x100;
- }
- *valp = value;
- return next;
-}
-
-static int get_char_token(int next, stream_t *stream, enum token_type type)
-{
- int value;
- struct token *token;
-
- next = escapechar(next, '\'', stream, &value);
- if (value == '\'' || next != '\'') {
- sparse_error(stream_pos(stream), "Bad character constant");
- drop_token(stream);
- return next;
- }
-
- token = stream->token;
- token_type(token) = type;
- token->character = value & 0xff;
-
- add_token(stream);
- return nextchar(stream);
-}
-
-static int get_string_token(int next, stream_t *stream, enum token_type type)
+static int eat_string(int next, stream_t *stream, enum token_type type)
{
static char buffer[MAX_STRING];
struct string *string;
- struct token *token;
+ struct token *token = stream->token;
int len = 0;
+ int escape;
+ int want_hex = 0;
+ char delim = type < TOKEN_STRING ? '\'' : '"';
- for (;;) {
- int val;
- next = escapechar(next, '"', stream, &val);
- if (val == '"')
- break;
+ for (escape = 0; escape || next != delim; next = nextchar(stream)) {
+ if (len < MAX_STRING)
+ buffer[len] = next;
+ len++;
+ if (next == '\n') {
+ warning(stream_pos(stream),
+ "Newline in string or character constant");
+ if (delim == '\'') /* assume it's lost ' */
+ break;
+ }
if (next == EOF) {
- warning(stream_pos(stream), "End of file in middle of string");
+ warning(stream_pos(stream),
+ "End of file in middle of string");
return next;
}
- if (len < MAX_STRING)
- buffer[len] = val;
- len++;
+ if (!escape) {
+ if (want_hex && !(cclass[next + 1] & Hex))
+ warning(stream_pos(stream),
+ "\\x used with no following hex digits");
+ want_hex = 0;
+ escape = next == '\\';
+ } else {
+ if (!(cclass[next + 1] & Escape))
+ warning(stream_pos(stream),
+ "Unknown escape '%c'", next);
+ escape = 0;
+ want_hex = next == 'x';
+ }
}
-
+ if (want_hex)
+ warning(stream_pos(stream),
+ "\\x used with no following hex digits");
if (len > MAX_STRING) {
warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
len = MAX_STRING;
}
-
- string = __alloc_string(len+1);
- memcpy(string->data, buffer, len);
- string->data[len] = '\0';
- string->length = len+1;
+ if (delim == '\'' && len <= 4) {
+ if (len == 0) {
+ sparse_error(stream_pos(stream),
+ "empty character constant");
+ return nextchar(stream);
+ }
+ token_type(token) = type + len;
+ memset(buffer + len, '\0', 4 - len);
+ memcpy(token->embedded, buffer, 4);
+ } else {
+ token_type(token) = type;
+ string = __alloc_string(len+1);
+ memcpy(string->data, buffer, len);
+ string->data[len] = '\0';
+ string->length = len+1;
+ token->string = string;
+ }
/* Pass it on.. */
token = stream->token;
- token_type(token) = type;
- token->string = string;
add_token(stream);
-
- return next;
+ return nextchar(stream);
}
static int drop_stream_eoln(stream_t *stream)
@@ -725,9 +750,9 @@ static int get_one_special(int c, stream_t *stream)
return get_one_number(c, next, stream);
break;
case '"':
- return get_string_token(next, stream, TOKEN_STRING);
+ return eat_string(next, stream, TOKEN_STRING);
case '\'':
- return get_char_token(next, stream, TOKEN_CHAR);
+ return eat_string(next, stream, TOKEN_CHAR);
case '/':
if (next == '/')
return drop_stream_eoln(stream);
@@ -901,17 +926,19 @@ static int get_one_identifier(int c, stream_t *stream)
buf[len] = next;
len++;
};
+ if (cclass[next + 1] & Quote) {
+ if (len == 1 && buf[0] == 'L') {
+ if (next == '\'')
+ return eat_string(nextchar(stream), stream,
+ TOKEN_WIDE_CHAR);
+ else
+ return eat_string(nextchar(stream), stream,
+ TOKEN_WIDE_STRING);
+ }
+ }
hash = ident_hash_end(hash);
-
ident = create_hashed_ident(buf, len, hash);
- if (ident == &L_ident) {
- if (next == '\'')
- return get_char_token(nextchar(stream), stream, TOKEN_WIDE_CHAR);
- if (next == '\"')
- return get_string_token(nextchar(stream), stream, TOKEN_WIDE_STRING);
- }
-
/* Pass it on.. */
token = stream->token;
token_type(token) = TOKEN_IDENT;
diff --git a/validation/__func__.c b/validation/__func__.c
new file mode 100644
index 00000000..65ce9282
--- /dev/null
+++ b/validation/__func__.c
@@ -0,0 +1,15 @@
+static void f(void)
+{
+ char *s1 = __func__;
+ char arr[2 * (sizeof __func__ == 2) - 1];
+ char *s2 = __func__ __func__;
+}
+/*
+ * check-name: __func__
+ * check-command: sparse -Wall $file
+ *
+ * check-error-start
+__func__.c:5:29: error: Expected ; at end of declaration
+__func__.c:5:29: error: got __func__
+ * check-error-end
+ */
diff --git a/validation/escapes.c b/validation/escapes.c
index 13f8f9c8..4a1b030e 100644
--- a/validation/escapes.c
+++ b/validation/escapes.c
@@ -8,14 +8,13 @@ static int bad_e[] = { '\c', '\0123', '\789', '\xdefg' };
* check-name: Character escape sequences
*
* check-error-start
-escapes.c:6:27: warning: Unknown escape 'c'
-escapes.c:6:35: error: Bad character constant
-escapes.c:6:38: error: Bad character constant
-escapes.c:6:42: error: Bad character constant
-escapes.c:6:46: error: Bad character constant
-escapes.c:6:53: error: Bad character constant
-escapes.c:6:56: error: Bad character constant
-escapes.c:6:42: error: Expected } at end of initializer
-escapes.c:6:42: error: got 89
+escapes.c:6:26: warning: Unknown escape 'c'
+escapes.c:3:34: warning: hex escape sequence out of range
+escapes.c:3:44: warning: hex escape sequence out of range
+escapes.c:4:18: warning: hex escape sequence out of range
+escapes.c:6:30: warning: multi-character character constant
+escapes.c:6:39: warning: multi-character character constant
+escapes.c:6:47: warning: hex escape sequence out of range
+escapes.c:6:47: warning: multi-character character constant
* check-error-end
*/
diff --git a/validation/foul-bitwise.c b/validation/foul-bitwise.c
index 9e21eab7..4b542cf9 100644
--- a/validation/foul-bitwise.c
+++ b/validation/foul-bitwise.c
@@ -24,7 +24,9 @@ static __le16 bar(__le16 a)
* check-error-start
foul-bitwise.c:9:16: warning: restricted __le16 degrades to integer
foul-bitwise.c:9:22: warning: restricted __le16 degrades to integer
-foul-bitwise.c:19:16: error: incompatible types for operation (-)
-foul-bitwise.c:19:16: argument has type restricted __le16 [usertype] a
+foul-bitwise.c:19:16: warning: restricted __le16 degrades to integer
+foul-bitwise.c:19:16: warning: incorrect type in return expression (different base types)
+foul-bitwise.c:19:16: expected restricted __le16
+foul-bitwise.c:19:16: got int
* check-error-end
*/
diff --git a/validation/preprocessor/preprocessor14.c b/validation/preprocessor/preprocessor14.c
index 05fc248b..027af040 100644
--- a/validation/preprocessor/preprocessor14.c
+++ b/validation/preprocessor/preprocessor14.c
@@ -7,7 +7,6 @@ A(,1)
B(,1)
/*
* check-name: Preprocessor #14
- * check-known-to-fail
* check-command: sparse -E $file
*
* check-output-start
diff --git a/validation/preprocessor/preprocessor23.c b/validation/preprocessor/preprocessor23.c
new file mode 100644
index 00000000..25be5085
--- /dev/null
+++ b/validation/preprocessor/preprocessor23.c
@@ -0,0 +1,47 @@
+#define H(x,...) ,##x##__VA_ARGS__##,##__VA_ARGS__
+H()
+H(x)
+H(,)
+H(x,)
+H(,x)
+H(x,x)
+#define I(x,...) ,##x##__VA_ARGS__
+I()
+I(x)
+I(,)
+I(x,)
+I(,x)
+I(x,x)
+/*
+ * check-name: Preprocessor #23
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+,
+,x
+,,
+,x,
+,x,x
+,xx,x
+,x
+,
+,x
+,x
+,xx
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/preprocessor23.c:3:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:4:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:5:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:5:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:6:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:6:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:7:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:7:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:10:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:12:1: error: '##' failed: concatenation is not a valid token
+preprocessor/preprocessor23.c:14:1: error: '##' failed: concatenation is not a valid token
+ * check-error-end
+ */
diff --git a/validation/preprocessor/stringify.c b/validation/preprocessor/stringify.c
new file mode 100644
index 00000000..7fe965d5
--- /dev/null
+++ b/validation/preprocessor/stringify.c
@@ -0,0 +1,29 @@
+#define A(x) #x
+A('a')
+A("a")
+A(a)
+A(\n)
+A('\n')
+A("\n")
+A('"')
+A("a\nb")
+A(L"a\nb")
+A('\12')
+/*
+ * check-name: Preprocessor #14
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+"'a'"
+"\"a\""
+"a"
+"\n"
+"'\\n'"
+"\"\\n\""
+"'\"'"
+"\"a\\nb\""
+"L\"a\\nb\""
+"'\\12'"
+ * check-output-end
+ */
diff --git a/validation/preprocessor/wide.c b/validation/preprocessor/wide.c
new file mode 100644
index 00000000..21b643ce
--- /dev/null
+++ b/validation/preprocessor/wide.c
@@ -0,0 +1,15 @@
+#define A(x) L##x
+A('a')
+A("bc")
+/*
+ * check-name: wide char token-pasting
+ * check-description: Used to cause infinite recursion.
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+L'a'
+L"bc"
+ * check-output-end
+ */
+
diff --git a/validation/wide.c b/validation/wide.c
new file mode 100644
index 00000000..847a680f
--- /dev/null
+++ b/validation/wide.c
@@ -0,0 +1,9 @@
+static char c = L'\x41';
+static int n = 1/(0x41 - L'\x41');
+/*
+ * check-name: wide character constants
+ *
+ * check-error-start
+wide.c:2:17: warning: division by zero
+ * check-error-end
+ */