diff options
| -rw-r--r-- | Documentation/IR.md | 356 | ||||
| -rw-r--r-- | Documentation/IR.rst | 411 | ||||
| -rw-r--r-- | Documentation/api.rst | 1 | ||||
| -rw-r--r-- | Documentation/conf.py | 1 | ||||
| -rw-r--r-- | Documentation/doc-guide.rst | 4 | ||||
| -rw-r--r-- | Documentation/index.rst | 2 | ||||
| -rwxr-xr-x | Documentation/sphinx/ir.py | 75 | ||||
| -rw-r--r-- | Makefile | 1 | ||||
| -rw-r--r-- | builtin.c | 5 | ||||
| -rw-r--r-- | lib.c | 176 | ||||
| -rw-r--r-- | lib.h | 2 | ||||
| -rw-r--r-- | pre-process.c | 115 | ||||
| -rw-r--r-- | tokenize.c | 9 | ||||
| -rw-r--r-- | utils.c | 17 | ||||
| -rw-r--r-- | utils.h | 25 | ||||
| -rw-r--r-- | validation/preprocessor/builtin.c | 17 |
16 files changed, 722 insertions, 495 deletions
diff --git a/Documentation/IR.md b/Documentation/IR.md deleted file mode 100644 index 03fa3f80..00000000 --- a/Documentation/IR.md +++ /dev/null @@ -1,356 +0,0 @@ -# Sparse's Intermediate Representation - - -## Instruction -This document briefly describes which field of struct instruction is -used by which operation. - -Some of those fields are used by almost all instructions, -some others are specific to only one or a few instructions. -The common ones are: -- .src1, .src2, .src3: (pseudo_t) operands of binops or ternary ops. -- .src: (pseudo_t) operand of unary ops (alias for .src1). -- .target: (pseudo_t) result of unary, binary & ternary ops, is sometimes used - otherwise by some others instructions. -- .cond: (pseudo_t) input operands for condition (alias .src/.src1) -- .type: (symbol*) usually the type of .result, sometimes of the operands - -### Terminators -#### OP_RET -Return from subroutine. -- .src : returned value (NULL if void) -- .type: type of .src - -#### OP_BR -Unconditional branch -- .bb_true: destination basic block - -#### OP_CBR -Conditional branch -- .cond: condition -- .type: type of .cond, must be an integral type -- .bb_true, .bb_false: destination basic blocks - -#### OP_SWITCH -Switch / multi-branch -- .cond: condition -- .type: type of .cond, must be an integral type -- .multijmp_list: pairs of case-value - destination basic block - -#### OP_COMPUTEDGOTO -Computed goto / branch to register -- .src: address to branch to (void*) -- .multijmp_list: list of possible destination basic blocks - -### Arithmetic binops -They all follow the same signature: -- .src1, .src1: operands (types must be compatible with .target) -- .target: result of the operation (must be an integral type) -- .type: type of .target - -#### OP_ADD -Integer addition. - -#### OP_SUB -Integer subtraction. - -#### OP_MUL -Integer multiplication. - -#### OP_DIVU -Integer unsigned division. - -#### OP_DIVS -Integer signed division. - -#### OP_MODU -Integer unsigned remainder. - -#### OP_MODS -Integer signed remainder. - -#### OP_SHL -Shift left (integer only) - -#### OP_LSR -Logical Shift right (integer only) - -#### OP_ASR -Arithmetic Shift right (integer only) - -### Floating-point binops -They all follow the same signature: -- .src1, .src1: operands (types must be compatible with .target) -- .target: result of the operation (must be a floating-point type) -- .type: type of .target - -#### OP_FADD -Floating-point addition. - -#### OP_FSUB -Floating-point subtraction. - -#### OP_FMUL -Floating-point multiplication. - -#### OP_FDIV -Floating-point division. - -### Logical ops -They all follow the same signature: -- .src1, .src2: operands (types must be compatible with .target) -- .target: result of the operation -- .type: type of .target, must be an integral type - -#### OP_AND -#### OP_OR -#### OP_XOR - -### Boolean ops -#### OP_AND_BOOL -#### OP_OR_BOOL - -### Integer compares -They all have the following signature: -- .src1, .src2: operands (types must be compatible) -- .target: result of the operation (0/1 valued integer) -- .type: type of .target, must be an integral type - -#### OP_SET_EQ -Compare equal. - -#### OP_SET_NE -Compare not-equal. - -#### OP_SET_LE -Compare less-than-or-equal (signed). - -#### OP_SET_GE -Compare greater-than-or-equal (signed). - -#### OP_SET_LT -Compare less-than (signed). - -#### OP_SET_GT -Compare greater-than (signed). - -#### OP_SET_B -Compare less-than (unsigned). - -#### OP_SET_A -Compare greater-than (unsigned). - -#### OP_SET_BE -Compare less-than-or-equal (unsigned). - -#### OP_SET_AE -Compare greater-than-or-equal (unsigned). - -### Floating-point compares -They all have the same signature as the integer compares. -The usual 6 operations exist in two versions: 'ordered' and -'unordered'. Theses operations first check if any operand is a -NaN and if it is the case the ordered compares return false -and then unordered return true, otherwise the result of the -comparison, now garanted to be done on non-NaNs, is returned. - -#### OP_FCMP_OEQ -Floating-point compare ordered equal - -#### OP_FCMP_ONE -Floating-point compare ordered not-equal - -#### OP_FCMP_OLE -Floating-point compare ordered less-than-or-equal - -#### OP_FCMP_OGE -Floating-point compare ordered greater-or-equal - -#### OP_FCMP_OLT -Floating-point compare ordered less-than - -#### OP_FCMP_OGT -Floating-point compare ordered greater-than - - -#### OP_FCMP_UEQ -Floating-point compare unordered equal - -#### OP_FCMP_UNE -Floating-point compare unordered not-equal - -#### OP_FCMP_ULE -Floating-point compare unordered less-than-or-equal - -#### OP_FCMP_UGE -Floating-point compare unordered greater-or-equal - -#### OP_FCMP_ULT -Floating-point compare unordered less-than - -#### OP_FCMP_UGT -Floating-point compare unordered greater-than - - -#### OP_FCMP_ORD -Floating-point compare ordered: return true if both operands are ordered -(none of the operands are a NaN) and false otherwise. - -#### OP_FCMP_UNO -Floating-point compare unordered: return false if no operands is ordered -and true otherwise. - -### Unary ops -#### OP_NOT -Logical not. -- .src: operand (type must be compatible with .target) -- .target: result of the operation -- .type: type of .target, must be an integral type - -#### OP_NEG -Integer negation. -- .src: operand (type must be compatible with .target) -- .target: result of the operation (must be an integral type) -- .type: type of .target - -#### OP_FNEG -Floating-point negation. -- .src: operand (type must be compatible with .target) -- .target: result of the operation (must be a floating-point type) -- .type: type of .target - -#### OP_COPY -Copy (only needed after out-of-SSA). -- .src: operand (type must be compatible with .target) -- .target: result of the operation -- .type: type of .target - -### Type conversions -They all have the following signature: -- .src: source value -- .orig_type: type of .src -- .target: result value -- .type: type of .target - -#### OP_CAST -Cast to unsigned integer (and to void pointer). - -#### OP_SCAST -Cast to signed integer. - -#### OP_FPCAST -Cast to floating-point. - -#### OP_PTRCAST -Cast to pointer. - -### Ternary ops -#### OP_SEL -- .src1: condition, must be of integral type -- .src2, .src3: operands (types must be compatible with .target) -- .target: result of the operation -- .type: type of .target - -#### OP_RANGE -Range/bounds checking (only used for an unused sparse extension). -- .src1: value to be checked -- .src2, src3: bound of the value (must be constants?) -- .type: type of .src[123]? - -### Memory ops -#### OP_LOAD -Load. -- .src: base address to load from -- .offset: address offset -- .target: loaded value -- .type: type of .target - -#### OP_STORE -Store. -- .src: base address to store to -- .offset: address offset -- .target: value to be stored -- .type: type of .target - -### Others -#### OP_SYMADDR -Create a pseudo corresponding to the address of a symbol. -- .symbol: (pseudo_t) input symbol (alias .src) -- .target: symbol's address - -#### OP_SETFVAL -Create a pseudo corresponding to a floating-point literal. -- .fvalue: the literal's value (long double) -- .target: the corresponding pseudo -- .type: type of the literal & .target - -#### OP_SETVAL -Create a pseudo corresponding to a string literal or a label-as-value. -The value is given as an expression EXPR_STRING or EXPR_LABEL. -- .val: (expression) input expression -- .target: the resulting value -- .type: type of .target, the value - -#### OP_PHI -Phi-node (for SSA form). -- .phi_list: phi-operands (type must be compatible with .target) -- .target: "result" -- .type: type of .target - -#### OP_PHISOURCE -Phi-node source. -Like OP_COPY but exclusively used to give a defining instructions -(and thus also a type) to *all* OP_PHI operands. -- .phi_src: operand (type must be compatible with .target, alias .src) -- .target: the "result" PSEUDO_PHI -- .type: type of .target -- .phi_users: list of phi instructions using the target pseudo - -#### OP_CALL -Function call. -- .func: (pseudo_t) the function (can be a symbol or a "register", alias .src)) -- .arguments: (pseudo_list) list of the associated arguments -- .target: function return value (if any) -- .type: type of .target -- .fntypes: (symbol_list) list of the function's types: the first enrty is the full function type, the next ones are the type of each arguments - -#### OP_INLINED_CALL -Only used as an annotation to show that the instructions just above -correspond to a function that have been inlined. -- .func: (pseudo_t) the function (must be a symbol, alias .src)) -- .arguments: list of pseudos that where the function's arguments -- .target: function return value (if any) -- .type: type of .target - -#### OP_SLICE -Extract a "slice" from an aggregate. -- .base: (pseudo_t) aggregate (alias .src) -- .from, .len: offet & size of the "slice" within the aggregate -- .target: result -- .type: type of .target - -#### OP_ASM -Inlined assembly code. -- .string: asm template -- .asm_rules: asm constraints, rules - -### Sparse tagging (line numbers, context, whatever) -#### OP_CONTEXT -Currently only used for lock/unlock tracking. -- .context_expr: unused -- .increment: (1 for locking, -1 for unlocking) -- .check: (ignore the instruction if 0) - -### Misc ops -#### OP_ENTRY -Function entry point (no associated semantic). - -#### OP_BADOP -Invalid operation (should never be generated). - -#### OP_NOP -No-op (should never be generated). - -#### OP_DEATHNOTE -Annotation telling the pseudo will be death after the next -instruction (other than some other annotation, that is). diff --git a/Documentation/IR.rst b/Documentation/IR.rst new file mode 100644 index 00000000..67ef06a5 --- /dev/null +++ b/Documentation/IR.rst @@ -0,0 +1,411 @@ +.. default-domain:: ir + +Sparse's Intermediate Representation +==================================== + +Instructions +~~~~~~~~~~~~ + +This document briefly describes which field of struct instruction is +used by which operation. + +Some of those fields are used by almost all instructions, +some others are specific to only one or a few instructions. +The common ones are: + +* .src1, .src2, .src3: (pseudo_t) operands of binops or ternary ops. +* .src: (pseudo_t) operand of unary ops (alias for .src1). +* .target: (pseudo_t) result of unary, binary & ternary ops, is + sometimes used otherwise by some others instructions. +* .cond: (pseudo_t) input operands for condition (alias .src/.src1) +* .type: (symbol*) usually the type of .result, sometimes of the operands + +Terminators +----------- +.. op:: OP_RET + Return from subroutine. + + * .src : returned value (NULL if void) + * .type: type of .src + +.. op:: OP_BR + Unconditional branch + + * .bb_true: destination basic block + +.. op:: OP_CBR + Conditional branch + + * .cond: condition + * .type: type of .cond, must be an integral type + * .bb_true, .bb_false: destination basic blocks + +.. op:: OP_SWITCH + Switch / multi-branch + + * .cond: condition + * .type: type of .cond, must be an integral type + * .multijmp_list: pairs of case-value - destination basic block + +.. op:: OP_COMPUTEDGOTO + Computed goto / branch to register + + * .src: address to branch to (void*) + * .multijmp_list: list of possible destination basic blocks + +Arithmetic binops +----------------- +They all follow the same signature: + * .src1, .src1: operands (types must be compatible with .target) + * .target: result of the operation (must be an integral type) + * .type: type of .target + +.. op:: OP_ADD + Integer addition. + +.. op:: OP_SUB + Integer subtraction. + +.. op:: OP_MUL + Integer multiplication. + +.. op:: OP_DIVU + Integer unsigned division. + +.. op:: OP_DIVS + Integer signed division. + +.. op:: OP_MODU + Integer unsigned remainder. + +.. op:: OP_MODS + Integer signed remainder. + +.. op:: OP_SHL + Shift left (integer only) + +.. op:: OP_LSR + Logical Shift right (integer only) + +.. op:: OP_ASR + Arithmetic Shift right (integer only) + +Floating-point binops +--------------------- +They all follow the same signature: + * .src1, .src1: operands (types must be compatible with .target) + * .target: result of the operation (must be a floating-point type) + * .type: type of .target + +.. op:: OP_FADD + Floating-point addition. + +.. op:: OP_FSUB + Floating-point subtraction. + +.. op:: OP_FMUL + Floating-point multiplication. + +.. op:: OP_FDIV + Floating-point division. + +Logical ops +----------- +They all follow the same signature: + * .src1, .src2: operands (types must be compatible with .target) + * .target: result of the operation + * .type: type of .target, must be an integral type + +.. op:: OP_AND + Logical AND + +.. op:: OP_OR + Logical OR + +.. op:: OP_XOR + Logical XOR + +Boolean ops +----------- +.. op:: OP_AND_BOOL + Boolean AND + +.. op:: OP_OR_BOOL + Boolean OR + +Integer compares +---------------- +They all have the following signature: + * .src1, .src2: operands (types must be compatible) + * .target: result of the operation (0/1 valued integer) + * .type: type of .target, must be an integral type + +.. op:: OP_SET_EQ + Compare equal. + +.. op:: OP_SET_NE + Compare not-equal. + +.. op:: OP_SET_LE + Compare less-than-or-equal (signed). + +.. op:: OP_SET_GE + Compare greater-than-or-equal (signed). + +.. op:: OP_SET_LT + Compare less-than (signed). + +.. op:: OP_SET_GT + Compare greater-than (signed). + +.. op:: OP_SET_B + Compare less-than (unsigned). + +.. op:: OP_SET_A + Compare greater-than (unsigned). + +.. op:: OP_SET_BE + Compare less-than-or-equal (unsigned). + +.. op:: OP_SET_AE + Compare greater-than-or-equal (unsigned). + +Floating-point compares +----------------------- +They all have the same signature as the integer compares. + +The usual 6 operations exist in two versions: 'ordered' and +'unordered'. These operations first check if any operand is a +NaN and if it is the case the ordered compares return false +and then unordered return true, otherwise the result of the +comparison, now guaranteed to be done on non-NaNs, is returned. + +.. op:: OP_FCMP_OEQ + Floating-point compare ordered equal + +.. op:: OP_FCMP_ONE + Floating-point compare ordered not-equal + +.. op:: OP_FCMP_OLE + Floating-point compare ordered less-than-or-equal + +.. op:: OP_FCMP_OGE + Floating-point compare ordered greater-or-equal + +.. op:: OP_FCMP_OLT + Floating-point compare ordered less-than + +.. op:: OP_FCMP_OGT + Floating-point compare ordered greater-than + + +.. op:: OP_FCMP_UEQ + Floating-point compare unordered equal + +.. op:: OP_FCMP_UNE + Floating-point compare unordered not-equal + +.. op:: OP_FCMP_ULE + Floating-point compare unordered less-than-or-equal + +.. op:: OP_FCMP_UGE + Floating-point compare unordered greater-or-equal + +.. op:: OP_FCMP_ULT + Floating-point compare unordered less-than + +.. op:: OP_FCMP_UGT + Floating-point compare unordered greater-than + + +.. op:: OP_FCMP_ORD + Floating-point compare ordered: return true if both operands are ordered + (none of the operands are a NaN) and false otherwise. + +.. op:: OP_FCMP_UNO + Floating-point compare unordered: return false if no operands is ordered + and true otherwise. + +Unary ops +--------- +.. op:: OP_NOT + Logical not. + + * .src: operand (type must be compatible with .target) + * .target: result of the operation + * .type: type of .target, must be an integral type + +.. op:: OP_NEG + Integer negation. + + * .src: operand (type must be compatible with .target) + * .target: result of the operation (must be an integral type) + * .type: type of .target + +.. op:: OP_FNEG + Floating-point negation. + + * .src: operand (type must be compatible with .target) + * .target: result of the operation (must be a floating-point type) + * .type: type of .target + +.. op:: OP_COPY + Copy (only needed after out-of-SSA). + + * .src: operand (type must be compatible with .target) + * .target: result of the operation + * .type: type of .target + +Type conversions +---------------- +They all have the following signature: + * .src: source value + * .orig_type: type of .src + * .target: result value + * .type: type of .target + +.. op:: OP_CAST + Cast to unsigned integer (and to void pointer). + +.. op:: OP_SCAST + Cast to signed integer. + +.. op:: OP_FPCAST + Cast to floating-point. + +.. op:: OP_PTRCAST + Cast to pointer. + +Ternary ops +----------- +.. op:: OP_SEL + * .src1: condition, must be of integral type + * .src2, .src3: operands (types must be compatible with .target) + * .target: result of the operation + * .type: type of .target + +.. op:: OP_RANGE + Range/bounds checking (only used for an unused sparse extension). + + * .src1: value to be checked + * .src2, src3: bound of the value (must be constants?) + * .type: type of .src[123]? + +Memory ops +---------- +.. op:: OP_LOAD + Load. + + * .src: base address to load from + * .offset: address offset + * .target: loaded value + * .type: type of .target + +.. op:: OP_STORE + Store. + + * .src: base address to store to + * .offset: address offset + * .target: value to be stored + * .type: type of .target + +Others +------ +.. op:: OP_SYMADDR + Create a pseudo corresponding to the address of a symbol. + + * .symbol: (pseudo_t) input symbol (alias .src) + * .target: symbol's address + +.. op:: OP_SETFVAL + Create a pseudo corresponding to a floating-point literal. + + * .fvalue: the literal's value (long double) + * .target: the corresponding pseudo + * .type: type of the literal & .target + +.. op:: OP_SETVAL + Create a pseudo corresponding to a string literal or a label-as-value. + The value is given as an expression EXPR_STRING or EXPR_LABEL. + + * .val: (expression) input expression + * .target: the resulting value + * .type: type of .target, the value + +.. op:: OP_PHI + Phi-node (for SSA form). + + * .phi_list: phi-operands (type must be compatible with .target) + * .target: "result" + * .type: type of .target + +.. op:: OP_PHISOURCE + Phi-node source. + Like OP_COPY but exclusively used to give a defining instructions + (and thus also a type) to *all* OP_PHI operands. + + * .phi_src: operand (type must be compatible with .target, alias .src) + * .target: the "result" PSEUDO_PHI + * .type: type of .target + * .phi_users: list of phi instructions using the target pseudo + +.. op:: OP_CALL + Function call. + + * .func: (pseudo_t) the function (can be a symbol or a "register", + alias .src)) + * .arguments: (pseudo_list) list of the associated arguments + * .target: function return value (if any) + * .type: type of .target + * .fntypes: (symbol_list) list of the function's types: the first + entry is the full function type, the next ones are the type of + each arguments + +.. op:: OP_INLINED_CALL + Only used as an annotation to show that the instructions just above + correspond to a function that have been inlined. + + * .func: (pseudo_t) the function (must be a symbol, alias .src)) + * .arguments: list of pseudos that where the function's arguments + * .target: function return value (if any) + * .type: type of .target + +.. op:: OP_SLICE + Extract a "slice" from an aggregate. + + * .base: (pseudo_t) aggregate (alias .src) + * .from, .len: offet & size of the "slice" within the aggregate + * .target: result + * .type: type of .target + +.. op:: OP_ASM + Inlined assembly code. + + * .string: asm template + * .asm_rules: asm constraints, rules + +Sparse tagging (line numbers, context, whatever) +------------------------------------------------ +.. op:: OP_CONTEXT + Currently only used for lock/unlock tracking. + + * .context_expr: unused + * .increment: (1 for locking, -1 for unlocking) + * .check: (ignore the instruction if 0) + +Misc ops +-------- +.. op:: OP_ENTRY + Function entry point (no associated semantic). + +.. op:: OP_BADOP + Invalid operation (should never be generated). + +.. op:: OP_NOP + No-op (should never be generated). + +.. op:: OP_DEATHNOTE + Annotation telling the pseudo will be death after the next + instruction (other than some other annotation, that is). + +.. # vim: tabstop=4 diff --git a/Documentation/api.rst b/Documentation/api.rst index d1a1d3ca..1270551c 100644 --- a/Documentation/api.rst +++ b/Documentation/api.rst @@ -9,6 +9,7 @@ Utilities ~~~~~~~~~ .. c:autodoc:: ptrlist.c +.. c:autodoc:: utils.h Parsing ~~~~~~~ diff --git a/Documentation/conf.py b/Documentation/conf.py index f7a68014..aae9d39b 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -29,6 +29,7 @@ needs_sphinx = '1.3' sys.path.insert(0, os.path.abspath('sphinx')) extensions = [ 'cdoc' + , 'ir' ] # support .md with python2 & python3 diff --git a/Documentation/doc-guide.rst b/Documentation/doc-guide.rst index 80ec82c2..8133cb3a 100644 --- a/Documentation/doc-guide.rst +++ b/Documentation/doc-guide.rst @@ -149,3 +149,7 @@ will be displayed like this: It's strongly encouraged to use this function instead of open coding a simple ``++``. + +Intermediate Representation +--------------------------- +.. c:autodoc:: Documentation/sphinx/ir.py diff --git a/Documentation/index.rst b/Documentation/index.rst index da006710..9825c8cd 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -20,8 +20,8 @@ Developer documentation test-suite dev-options - IR api + IR doc-guide How to contribute diff --git a/Documentation/sphinx/ir.py b/Documentation/sphinx/ir.py new file mode 100755 index 00000000..3028200a --- /dev/null +++ b/Documentation/sphinx/ir.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# SPDX_License-Identifier: MIT +# +# Copyright (C) 2018 Luc Van Oostenryck <luc.vanoostenryck@gmail.com> +# + +""" +/// +// To document the instructions used in the intermediate representation +// a new domain is defined: 'ir' with a directive:: +// +// .. op: <OP_NAME> +// <description of OP_NAME> +// ... +// +// This is equivalent to using a definition list but with the name +// also placed in the index (with 'IR instruction' as descriptions). + +""" + +import docutils +import sphinx + +class IROpDirective(docutils.parsers.rst.Directive): + + # use the first line of content as the argument, this allow + # to not have to write a blanck line after the directive + final_argument_whitespace = True + required_argument = 0 + #optional_arguments = 0 + has_content = True + + objtype = None + + def run(self): + self.env = self.state.document.settings.env + + source = self.state.document + lineno = self.lineno + text = self.content + name = text[0] + + node = docutils.nodes.section() + node['ids'].append(name) + node.document = source + + index = '.. index:: pair: %s; IR instruction' % name + content = docutils.statemachine.ViewList() + content.append(index, source, lineno) + content.append('' , source, lineno) + content.append(name , source, lineno) + content.append('' , source, lineno) + self.state.nested_parse(content, self.content_offset, node) + + defnode = docutils.nodes.definition() + self.state.nested_parse(text[1:], self.content_offset, defnode) + node.append(defnode) + + return [node] + +class IRDomain(sphinx.domains.Domain): + + """IR domain.""" + name = 'ir' + +def setup(app): + app.add_domain(IRDomain) + app.add_directive_to_domain('ir', 'op', IROpDirective) + + return { + 'version': '1.0', + 'parallel_read_safe': True, + } + +# vim: tabstop=4 @@ -61,6 +61,7 @@ LIB_OBJS += symbol.o LIB_OBJS += target.o LIB_OBJS += tokenize.o LIB_OBJS += unssa.o +LIB_OBJS += utils.o PROGRAMS := PROGRAMS += compile @@ -550,4 +550,9 @@ void declare_builtins(void) declare_builtin("__sync_synchronize", &void_ctype, 0, NULL); declare_builtin("__sync_val_compare_and_swap", &int_ctype, 1, &ptr_ctype, NULL); declare_builtin("__sync_xor_and_fetch", &int_ctype, 1, &ptr_ctype, NULL); + + // Blackfin-specific stuff + declare_builtin("__builtin_bfin_csync", &void_ctype, 0, NULL); + declare_builtin("__builtin_bfin_ssync", &void_ctype, 0, NULL); + declare_builtin("__builtin_bfin_norm_fr1x32", &int_ctype, 0, &int_ctype, NULL); } @@ -477,8 +477,8 @@ static void handle_arch_m64_finalize(void) switch (arch_m64) { case ARCH_X32: max_int_alignment = 8; - add_pre_buffer("#weak_define __ILP32__ 1\n"); - add_pre_buffer("#weak_define _ILP32 1\n"); + predefine("__ILP32__", 1, "1"); + predefine("_ILP32", 1, "1"); goto case_x86_64; case ARCH_LP32: /* default values */ @@ -488,15 +488,15 @@ static void handle_arch_m64_finalize(void) max_int_alignment = 8; size_t_ctype = &ulong_ctype; ssize_t_ctype = &long_ctype; - add_pre_buffer("#weak_define __LP64__ 1\n"); - add_pre_buffer("#weak_define _LP64 1\n"); + predefine("__LP64__", 1, "1"); + predefine("_LP64", 1, "1"); goto case_64bit_common; case ARCH_LLP64: bits_in_long = 32; max_int_alignment = 8; size_t_ctype = &ullong_ctype; ssize_t_ctype = &llong_ctype; - add_pre_buffer("#weak_define __LLP64__ 1\n"); + predefine("__LLP64__", 1, "1"); goto case_64bit_common; case_64bit_common: bits_in_pointer = 64; @@ -504,8 +504,8 @@ static void handle_arch_m64_finalize(void) /* fall through */ case_x86_64: #if defined(__x86_64__) || defined(__x86_64) - add_pre_buffer("#weak_define __x86_64__ 1\n"); - add_pre_buffer("#weak_define __x86_64 1\n"); + predefine("__x86_64__", 1, "1"); + predefine("__x86_64", 1, "1"); #endif break; } @@ -1111,14 +1111,19 @@ static char **handle_switch(char *arg, char **next) static void predefined_sizeof(const char *name, unsigned bits) { - add_pre_buffer("#weak_define __SIZEOF_%s__ %d\n", name, bits/8); + char buf[32]; + + snprintf(buf, sizeof(buf), "__SIZEOF_%s__", name); + predefine(buf, 1, "%d", bits/8); } static void predefined_max(const char *name, const char *suffix, unsigned bits) { unsigned long long max = (1ULL << (bits - 1 )) - 1; + char buf[32]; - add_pre_buffer("#weak_define __%s_MAX__ %#llx%s\n", name, max, suffix); + snprintf(buf, sizeof(buf), "__%s_MAX__", name); + predefine(buf, 1, "%#llx%s", max, suffix); } static void predefined_type_size(const char *name, const char *suffix, unsigned bits) @@ -1129,13 +1134,50 @@ static void predefined_type_size(const char *name, const char *suffix, unsigned static void predefined_macros(void) { - add_pre_buffer("#define __CHECKER__ 1\n"); + predefine("__CHECKER__", 0, "1"); + predefine("__GNUC__", 1, "%d", gcc_major); + predefine("__GNUC_MINOR__", 1, "%d", gcc_minor); + predefine("__GNUC_PATCHLEVEL__", 1, "%d", gcc_patchlevel); + + predefine("__STDC__", 1, "1"); + switch (standard) { + case STANDARD_C89: + predefine("__STRICT_ANSI__", 1, "1"); + break; + + case STANDARD_C94: + predefine("__STDC_VERSION__", 1, "199409L"); + predefine("__STRICT_ANSI__", 1, "1"); + break; + + case STANDARD_C99: + predefine("__STDC_VERSION__", 1, "199901L"); + predefine("__STRICT_ANSI__", 1, "1"); + break; + + case STANDARD_GNU89: + default: + break; + + case STANDARD_GNU99: + predefine("__STDC_VERSION__", 1, "199901L"); + break; + + case STANDARD_C11: + predefine("__STRICT_ANSI__", 1, "1"); + case STANDARD_GNU11: + predefine("__STDC_NO_ATOMICS__", 1, "1"); + predefine("__STDC_NO_COMPLEX__", 1, "1"); + predefine("__STDC_NO_THREADS__", 1, "1"); + predefine("__STDC_VERSION__", 1, "201112L"); + break; + } predefined_sizeof("SHORT", bits_in_short); predefined_max("SHRT", "", bits_in_short); predefined_max("SCHAR", "", bits_in_char); predefined_max("WCHAR", "", bits_in_wchar); - add_pre_buffer("#weak_define __CHAR_BIT__ %d\n", bits_in_char); + predefine("__CHAR_BIT__", 1, "%d", bits_in_char); predefined_type_size("INT", "", bits_in_int); predefined_type_size("LONG", "L", bits_in_long); @@ -1151,52 +1193,30 @@ static void predefined_macros(void) predefined_sizeof("DOUBLE", bits_in_double); predefined_sizeof("LONG_DOUBLE", bits_in_longdouble); - add_pre_buffer("#weak_define __%s_ENDIAN__ 1\n", - arch_big_endian ? "BIG" : "LITTLE"); - - add_pre_buffer("#weak_define __ORDER_LITTLE_ENDIAN__ 1234\n"); - add_pre_buffer("#weak_define __ORDER_BIG_ENDIAN__ 4321\n"); - add_pre_buffer("#weak_define __ORDER_PDP_ENDIAN__ 3412\n"); - add_pre_buffer("#weak_define __BYTE_ORDER__ __ORDER_%s_ENDIAN__\n", - arch_big_endian ? "BIG" : "LITTLE"); -} + predefine("__ORDER_LITTLE_ENDIAN__", 1, "1234"); + predefine("__ORDER_BIG_ENDIAN__", 1, "4321"); + predefine("__ORDER_PDP_ENDIAN__", 1, "3412"); + if (arch_big_endian) { + predefine("__BIG_ENDIAN__", 1, "1"); + predefine("__BYTE_ORDER__", 1, "__ORDER_BIG_ENDIAN__"); + } else { + predefine("__LITTLE_ENDIAN__", 1, "1"); + predefine("__BYTE_ORDER__", 1, "__ORDER_LITTLE_ENDIAN__"); + } -static void declare_builtin_functions(void) -{ - /* Note: - * Most builtin functions are declared in builtin.c:declare_builtins(). - * Some are also defined in builtin:init_builtins(). - */ + if (optimize_level) + predefine("__OPTIMIZE__", 0, "1"); + if (optimize_size) + predefine("__OPTIMIZE_SIZE__", 0, "1"); - /* Add Blackfin-specific stuff */ - add_pre_buffer( - "#ifdef __bfin__\n" - "extern void __builtin_bfin_csync(void);\n" - "extern void __builtin_bfin_ssync(void);\n" - "extern int __builtin_bfin_norm_fr1x32(int);\n" - "#endif\n" - ); + // Temporary hacks + predefine("__extension__", 0, NULL); + predefine("__pragma__", 0, NULL); } static void create_builtin_stream(void) { - add_pre_buffer("#weak_define __GNUC__ %d\n", gcc_major); - add_pre_buffer("#weak_define __GNUC_MINOR__ %d\n", gcc_minor); - add_pre_buffer("#weak_define __GNUC_PATCHLEVEL__ %d\n", gcc_patchlevel); - - /* add the multiarch include directories, if any */ - if (multiarch_dir && *multiarch_dir) { - add_pre_buffer("#add_system \"/usr/include/%s\"\n", multiarch_dir); - add_pre_buffer("#add_system \"/usr/local/include/%s\"\n", multiarch_dir); - } - - /* We add compiler headers path here because we have to parse - * the arguments to get it, falling back to default. */ - add_pre_buffer("#add_system \"%s/include\"\n", gcc_base_dir); - add_pre_buffer("#add_system \"%s/include-fixed\"\n", gcc_base_dir); - - add_pre_buffer("#define __extension__\n"); - add_pre_buffer("#define __pragma__\n"); + // Temporary hack add_pre_buffer("#define _Pragma(x)\n"); // gcc defines __SIZE_TYPE__ to be size_t. For linux/i86 and @@ -1208,44 +1228,19 @@ static void create_builtin_stream(void) add_pre_buffer("#weak_define __SIZE_TYPE__ long unsigned int\n"); else add_pre_buffer("#weak_define __SIZE_TYPE__ unsigned int\n"); - add_pre_buffer("#weak_define __STDC__ 1\n"); - - switch (standard) - { - case STANDARD_C89: - add_pre_buffer("#weak_define __STRICT_ANSI__\n"); - break; - - case STANDARD_C94: - add_pre_buffer("#weak_define __STDC_VERSION__ 199409L\n"); - add_pre_buffer("#weak_define __STRICT_ANSI__\n"); - break; - case STANDARD_C99: - add_pre_buffer("#weak_define __STDC_VERSION__ 199901L\n"); - add_pre_buffer("#weak_define __STRICT_ANSI__\n"); - break; - - case STANDARD_GNU89: - break; - - case STANDARD_GNU99: - add_pre_buffer("#weak_define __STDC_VERSION__ 199901L\n"); - break; - - case STANDARD_C11: - add_pre_buffer("#weak_define __STRICT_ANSI__ 1\n"); - case STANDARD_GNU11: - add_pre_buffer("#weak_define __STDC_NO_ATOMICS__ 1\n"); - add_pre_buffer("#weak_define __STDC_NO_COMPLEX__ 1\n"); - add_pre_buffer("#weak_define __STDC_NO_THREADS__ 1\n"); - add_pre_buffer("#weak_define __STDC_VERSION__ 201112L\n"); - break; - default: - assert (0); + /* add the multiarch include directories, if any */ + if (multiarch_dir && *multiarch_dir) { + add_pre_buffer("#add_system \"/usr/include/%s\"\n", multiarch_dir); + add_pre_buffer("#add_system \"/usr/local/include/%s\"\n", multiarch_dir); } + /* We add compiler headers path here because we have to parse + * the arguments to get it, falling back to default. */ + add_pre_buffer("#add_system \"%s/include\"\n", gcc_base_dir); + add_pre_buffer("#add_system \"%s/include-fixed\"\n", gcc_base_dir); + add_pre_buffer("#define __has_builtin(x) 0\n"); add_pre_buffer("#define __builtin_stdarg_start(a,b) ((a) = (__builtin_va_list)(&(b)))\n"); add_pre_buffer("#define __builtin_va_start(a,b) ((a) = (__builtin_va_list)(&(b)))\n"); @@ -1258,11 +1253,6 @@ static void create_builtin_stream(void) add_pre_buffer("#define __builtin_va_end(arg)\n"); add_pre_buffer("#define __builtin_ms_va_end(arg)\n"); add_pre_buffer("#define __builtin_va_arg_pack()\n"); - - if (optimize_level) - add_pre_buffer("#define __OPTIMIZE__ 1\n"); - if (optimize_size) - add_pre_buffer("#define __OPTIMIZE_SIZE__ 1\n"); } static struct symbol_list *sparse_tokenstream(struct token *token) @@ -1376,11 +1366,9 @@ struct symbol_list *sparse_initialize(int argc, char **argv, struct string_list // Initialize type system init_ctype(); - declare_builtins(); - create_builtin_stream(); predefined_macros(); - if (!preprocess_only) - declare_builtin_functions(); + create_builtin_stream(); + declare_builtins(); list = sparse_initial(); @@ -33,6 +33,7 @@ #include "compat.h" #include "ptrlist.h" +#include "utils.h" #define DO_STRINGIFY(x) #x #define STRINGIFY(x) DO_STRINGIFY(x) @@ -129,6 +130,7 @@ enum phase { extern void add_pre_buffer(const char *fmt, ...) FORMAT_ATTR(1); +extern void predefine(const char *name, int weak, const char *fmt, ...) FORMAT_ATTR(3); extern int preprocess_only; diff --git a/pre-process.c b/pre-process.c index 547ce092..da4b7acd 100644 --- a/pre-process.c +++ b/pre-process.c @@ -553,13 +553,10 @@ static int merge(struct token *left, struct token *right) left->pos.noexpand = 0; return 1; - case TOKEN_NUMBER: { - char *number = __alloc_bytes(strlen(buffer) + 1); - memcpy(number, buffer, strlen(buffer) + 1); + case TOKEN_NUMBER: token_type(left) = TOKEN_NUMBER; /* could be . + num */ - left->number = number; + left->number = xstrdup(buffer); return 1; - } case TOKEN_SPECIAL: if (buffer[2] && buffer[3]) @@ -910,8 +907,7 @@ static int try_include(const char *path, const char *filename, int flen, struct return 1; fd = open(fullname, O_RDONLY); if (fd >= 0) { - char * streamname = __alloc_bytes(plen + flen); - memcpy(streamname, fullname, plen + flen); + char *streamname = xmemdup(fullname, plen + flen); *where = tokenize(streamname, fd, *where, next_path); close(fd); return 1; @@ -1341,40 +1337,16 @@ Earg: return NULL; } -static int do_handle_define(struct stream *stream, struct token **line, struct token *token, int attr) +static int do_define(struct position pos, struct token *token, struct ident *name, + struct token *arglist, struct token *expansion, int attr) { - struct token *arglist, *expansion; - struct token *left = token->next; struct symbol *sym; - struct ident *name; - int ret; - - if (token_type(left) != TOKEN_IDENT) { - sparse_error(token->pos, "expected identifier to 'define'"); - return 1; - } - - name = left->ident; - - arglist = NULL; - expansion = left->next; - if (!expansion->pos.whitespace) { - if (match_op(expansion, '(')) { - arglist = expansion; - expansion = parse_arguments(expansion); - if (!expansion) - return 1; - } else if (!eof_token(expansion)) { - warning(expansion->pos, - "no whitespace before object-like macro body"); - } - } + int ret = 1; expansion = parse_expansion(expansion, arglist, name); if (!expansion) return 1; - ret = 1; sym = lookup_symbol(name, NS_MACRO | NS_UNDEF); if (sym) { int clean; @@ -1389,7 +1361,7 @@ static int do_handle_define(struct stream *stream, struct token **line, struct t ret = 0; if ((clean && attr == SYM_ATTR_NORMAL) || sym->used_in == file_scope) { - warning(left->pos, "preprocessor token %.*s redefined", + warning(pos, "preprocessor token %.*s redefined", name->len, name->name); info(sym->pos, "this was the original definition"); } @@ -1398,7 +1370,7 @@ static int do_handle_define(struct stream *stream, struct token **line, struct t } if (!sym || sym->scope != file_scope) { - sym = alloc_symbol(left->pos, SYM_NODE); + sym = alloc_symbol(pos, SYM_NODE); bind_symbol(sym, name, NS_MACRO); add_ident(¯os, name); ret = 0; @@ -1407,7 +1379,8 @@ static int do_handle_define(struct stream *stream, struct token **line, struct t if (!ret) { sym->expansion = expansion; sym->arglist = arglist; - __free_token(token); /* Free the "define" token, but not the rest of the line */ + if (token) /* Free the "define" token, but not the rest of the line */ + __free_token(token); } sym->namespace = NS_MACRO; @@ -1417,6 +1390,74 @@ out: return ret; } +/// +// predefine a macro with a printf-formatted value +// @name: the name of the macro +// @weak: 0/1 for a normal or a weak define +// @fmt: the printf format followed by it's arguments. +// +// The type of the value is automatically infered: +// TOKEN_NUMBER if it starts by a digit, TOKEN_IDENT otherwise. +// If @fmt is null or empty, the macro is defined with an empty definition. +void predefine(const char *name, int weak, const char *fmt, ...) +{ + struct ident *ident = built_in_ident(name); + struct token *value = &eof_token_entry; + int attr = weak ? SYM_ATTR_WEAK : SYM_ATTR_NORMAL; + + if (fmt && fmt[0]) { + static char buf[256]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + value = __alloc_token(0); + if (isdigit(buf[0])) { + token_type(value) = TOKEN_NUMBER; + value->number = xstrdup(buf); + } else { + token_type(value) = TOKEN_IDENT; + value->ident = built_in_ident(buf); + } + value->pos.whitespace = 1; + value->next = &eof_token_entry; + } + + do_define(value->pos, NULL, ident, NULL, value, attr); +} + +static int do_handle_define(struct stream *stream, struct token **line, struct token *token, int attr) +{ + struct token *arglist, *expansion; + struct token *left = token->next; + struct ident *name; + + if (token_type(left) != TOKEN_IDENT) { + sparse_error(token->pos, "expected identifier to 'define'"); + return 1; + } + + name = left->ident; + + arglist = NULL; + expansion = left->next; + if (!expansion->pos.whitespace) { + if (match_op(expansion, '(')) { + arglist = expansion; + expansion = parse_arguments(expansion); + if (!expansion) + return 1; + } else if (!eof_token(expansion)) { + warning(expansion->pos, + "no whitespace before object-like macro body"); + } + } + + return do_define(left->pos, token, name, arglist, expansion, attr); +} + static int handle_define(struct stream *stream, struct token **line, struct token *token) { return do_handle_define(stream, line, token, SYM_ATTR_NORMAL); @@ -528,8 +528,7 @@ static int get_one_number(int c, int next, stream_t *stream) { struct token *token; static char buffer[4095]; - char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer); - int len; + char *p = buffer, *buffer_end = buffer + sizeof (buffer); *p++ = c; for (;;) { @@ -557,13 +556,9 @@ static int get_one_number(int c, int next, stream_t *stream) } *p++ = 0; - len = p - buffer; - buf = __alloc_bytes(len); - memcpy(buf, buffer, len); - token = stream->token; token_type(token) = TOKEN_NUMBER; - token->number = buf; + token->number = xmemdup(buffer, p - buffer); add_token(stream); return next; diff --git a/utils.c b/utils.c new file mode 100644 index 00000000..4945e1ca --- /dev/null +++ b/utils.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2018 Luc Van Oostenryck + +#include "utils.h" +#include "allocate.h" +#include <string.h> + + +void *xmemdup(const void *src, size_t len) +{ + return memcpy(__alloc_bytes(len), src, len); +} + +char *xstrdup(const char *src) +{ + return xmemdup(src, strlen(src) + 1); +} diff --git a/utils.h b/utils.h new file mode 100644 index 00000000..38749be2 --- /dev/null +++ b/utils.h @@ -0,0 +1,25 @@ +#ifndef UTILS_H +#define UTILS_H + +/// +// Miscellaneous utilities +// ----------------------- + +#include <stddef.h> + +/// +// duplicate a memory buffer in a newly allocated buffer. +// @src: a pointer to the memory buffer to be duplicated +// @len: the size of the memory buffer to be duplicated +// @return: a pointer to a copy of @src allocated via +// :func:`__alloc_bytes()`. +void *xmemdup(const void *src, size_t len); + +/// +// duplicate a null-terminated string in a newly allocated buffer. +// @src: a pointer to string to be duplicated +// @return: a pointer to a copy of @str allocated via +// :func:`__alloc_bytes()`. +char *xstrdup(const char *src); + +#endif diff --git a/validation/preprocessor/builtin.c b/validation/preprocessor/builtin.c new file mode 100644 index 00000000..6c3aa176 --- /dev/null +++ b/validation/preprocessor/builtin.c @@ -0,0 +1,17 @@ +__CHECKER__ +F(__CHECKER__,__CHECKER__) +S(#__CHECKER__) +const char str[] = "__CHECKER__"; + +/* + * check-name: builtin + * check-command: sparse -E $file + * + * check-output-start + +1 +F(1,1) +S(#1) +const char str[] = "__CHECKER__"; + * check-output-end + */ |
