diff options
| author | Christopher Li <sparse@chrisli.org> | 2013-02-12 23:01:45 -0800 |
|---|---|---|
| committer | Christopher Li <sparse@chrisli.org> | 2013-02-13 14:55:26 -0800 |
| commit | 1b8e012d10d2a5af2d4935e4a47df9c527399219 (patch) | |
| tree | eb5c93ce49e6bc718f6da55fdb59c85df7a7f011 /expression.c | |
| parent | 6558e30ec635e26e767cee027936a0d0cae79bcb (diff) | |
| parent | 3dbed8ac24a2b4b24bc9776d89ea5328f1424a63 (diff) | |
| download | sparse-dev-1b8e012d10d2a5af2d4935e4a47df9c527399219.tar.gz | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/viro/sparse into marge
Pull preprocessor fix from Al Viro.
1) we really, really shouldn't convert escape sequences too early;
#define A(x) #x
A('\12')
should yield "'\\12'", *not* "'\\n'".
2) literal merging handles all sequences of string/wide string
literals; result is wide if any of them is wide. string_expression() is
handling that wrong - "ab"L"c" is L"abc"
3) with support (no matter how cursory) of wide char constants
and wide string literals, we really ought to handle
#define A(x,y)
A(L,'a')
properly; it's not that tricky - combine() needs to recognize <IDENT["L"],CHAR>
and <IDENT["L"],STRING> pairs.
4) '\777' is an error, while L'\777' is valid - the value should fit
into unsigned char or unsigned counterpart of wchar_t. Note that for string
literals this happens *after* phase 6 - what matters is the type of literal
after joining the adjacent ones (see (2) above).
5) stringifying should only quote \ and " in character constants and
string literals,
#define A(x) #x
A(\n)
should produce "\n", not "\\n"
6) we are losing L when stringifying wide string literals; that's
wrong.
I've patches hopefully fixing the above. Basically, I delay interpreting
escape sequences (past the bare minimum needed to find where the token ends)
until we are handling an expression with character constant or string literal
in it.
For character constants I'm keeping the token body in token->embedded -
4-character array replacing token->character. That covers practically
all realistic instances; character constant *may* be longer than that,
but it has to be something like '\x000000000000000000000000041' - sure,
that's 100% legitimate C and it's going to be the same as '\x41' on
everything, but when was the last time you've seen something like that?
So I've split TOKEN_CHAR into 5 values - TOKEN_CHAR+1--TOKEN_CHAR+4 meaning
1--4 characters kept in ->embedded[], TOKEN_CHAR itself used for extremely
rare cases longer than that (token->string holds the body in that case).
TOKEN_WIDE_CHAR got the same treatment.
AFAICS, with those fixes we get the same behaviour as in gcc for
silently ignored by cpp if the string/char constant doesn't make it
out of preprocessor. sparse still warns about those. The situation
with this one is frustrating; on one hand C99 is saying that e.g.
'\x' is not a token. Moreover, in a footnote in 6.4.4.4 it flat-out
requires diagnostics for such. On the other hand... footnotes are
informative-only and having "other character" token match ' would
puts us in nasal daemon country, so gcc is free to do whatever it feels
like doing. I think we shouldn't play that kind of standard-lawyering
*and* sparse has always warned on that, so I've left that warning
in place.
Note that real wchar_t handling is still not there; at the very least,
we need to decide what type will be used for that sucker (for gcc it's
int on all targets we care about), fix the handling of wide string literals
in initializers and evaluate_string() and stop dropping upper bits in
get_string_constant(). That would probably mean not using struct string
for wide ones, as well... Hell knows; I don't want to touch that right
now. If anything, I'd rather wait until we get to C11 support - they've
got much saner variants of wide strings there (char16_t/char32_t with
u and U as token prefix as L is used for wchar_t; there's also u8"..." for
UTF8 strings).
Diffstat (limited to 'expression.c')
| -rw-r--r-- | expression.c | 151 |
1 files changed, 50 insertions, 101 deletions
diff --git a/expression.c b/expression.c index 9f45c794..d2437c74 100644 --- a/expression.c +++ b/expression.c @@ -26,6 +26,7 @@ #include "scope.h" #include "expression.h" #include "target.h" +#include "char.h" static int match_oplist(int op, ...) { @@ -64,53 +65,50 @@ struct token *parens_expression(struct token *token, struct expression **expr, c * Handle __func__, __FUNCTION__ and __PRETTY_FUNCTION__ token * conversion */ -static int convert_one_fn_token(struct token *token) +static struct symbol *handle_func(struct token *token) { - struct symbol *sym = current_fn; - - if (sym) { - struct ident *ident = sym->ident; - if (ident) { - int len = ident->len; - struct string *string; - - string = __alloc_string(len+1); - memcpy(string->data, ident->name, len); - string->data[len] = 0; - string->length = len+1; - token_type(token) = TOKEN_STRING; - token->string = string; - return 1; - } - } - return 0; -} - -static int convert_function(struct token *next) -{ - int retval = 0; - for (;;) { - struct token *token = next; - next = next->next; - switch (token_type(token)) { - case TOKEN_STRING: - continue; - case TOKEN_IDENT: - if (token->ident == &__func___ident || - token->ident == &__FUNCTION___ident || - token->ident == &__PRETTY_FUNCTION___ident) { - if (!convert_one_fn_token(token)) - break; - retval = 1; - continue; - } - /* Fall through */ - default: - break; - } - break; - } - return retval; + struct ident *ident = token->ident; + struct symbol *decl, *array; + struct string *string; + int len; + + if (ident != &__func___ident && + ident != &__FUNCTION___ident && + ident != &__PRETTY_FUNCTION___ident) + return NULL; + + if (!current_fn) + return NULL; + + /* OK, it's one of ours */ + array = alloc_symbol(token->pos, SYM_ARRAY); + array->ctype.base_type = &char_ctype; + array->ctype.alignment = 1; + array->endpos = token->pos; + decl = alloc_symbol(token->pos, SYM_NODE); + decl->ctype.base_type = array; + decl->ctype.alignment = 1; + decl->ctype.modifiers = MOD_STATIC; + decl->endpos = token->pos; + + /* function-scope, but in NS_SYMBOL */ + bind_symbol(decl, ident, NS_LABEL); + decl->namespace = NS_SYMBOL; + + len = current_fn->ident->len; + string = __alloc_string(len + 1); + memcpy(string->data, current_fn->ident->name, len); + string->data[len] = 0; + string->length = len + 1; + + decl->initializer = alloc_expression(token->pos, EXPR_STRING); + decl->initializer->string = string; + decl->initializer->ctype = decl; + decl->array_size = alloc_const_expression(token->pos, len + 1); + array->array_size = decl->array_size; + decl->bit_size = array->bit_size = bytes_to_bits(len + 1); + + return decl; } static struct token *parse_type(struct token *token, struct expression **tree) @@ -220,50 +218,6 @@ static struct token *builtin_offsetof_expr(struct token *token, } } -static struct token *string_expression(struct token *token, struct expression *expr) -{ - struct string *string = token->string; - struct token *next = token->next; - int stringtype = token_type(token); - - convert_function(token); - - if (token_type(next) == stringtype) { - int totlen = string->length-1; - char *data; - - do { - totlen += next->string->length-1; - next = next->next; - } while (token_type(next) == stringtype); - - if (totlen > MAX_STRING) { - warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING); - totlen = MAX_STRING; - } - - string = __alloc_string(totlen+1); - string->length = totlen+1; - data = string->data; - next = token; - do { - struct string *s = next->string; - int len = s->length-1; - - if (len > totlen) - len = totlen; - totlen -= len; - - next = next->next; - memcpy(data, s->data, len); - data += len; - } while (token_type(next) == stringtype); - *data = '\0'; - } - expr->string = string; - return next; -} - #ifndef ULLONG_MAX #define ULLONG_MAX (~0ULL) #endif @@ -404,12 +358,11 @@ struct token *primary_expression(struct token *token, struct expression **tree) struct expression *expr = NULL; switch (token_type(token)) { - case TOKEN_CHAR: - case TOKEN_WIDE_CHAR: + case TOKEN_CHAR ... TOKEN_WIDE_CHAR + 4: expr = alloc_expression(token->pos, EXPR_VALUE); expr->flags = Int_const_expr; - expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype; - expr->value = (unsigned char) token->character; + expr->ctype = token_type(token) < TOKEN_WIDE_CHAR ? &int_ctype : &long_ctype; + get_char_constant(token, &expr->value); token = token->next; break; @@ -434,8 +387,7 @@ struct token *primary_expression(struct token *token, struct expression **tree) struct token *next = token->next; if (!sym) { - if (convert_function(token)) - goto handle_string; + sym = handle_func(token); if (token->ident == &__builtin_types_compatible_p_ident) { token = builtin_types_compatible_p_expr(token, &expr); break; @@ -473,13 +425,10 @@ struct token *primary_expression(struct token *token, struct expression **tree) } case TOKEN_STRING: - case TOKEN_WIDE_STRING: { - handle_string: + case TOKEN_WIDE_STRING: expr = alloc_expression(token->pos, EXPR_STRING); - expr->wide = token_type(token) == TOKEN_WIDE_STRING; - token = string_expression(token, expr); + token = get_string_constant(token, expr); break; - } case TOKEN_SPECIAL: if (token->special == '(') { |
