diff options
36 files changed, 2020 insertions, 14 deletions
@@ -23,6 +23,7 @@ example test-unssa ctags c2xml +sparse-llvm # tags tags diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt new file mode 100644 index 00000000..061791eb --- /dev/null +++ b/Documentation/sparse.txt @@ -0,0 +1,45 @@ +Sparse +~~~~~~ + +__nocast vs __bitwise: + +__nocast warns about explicit or implicit casting to different types. + +HOWEVER, it doesn't consider two 32-bit integers to be different +types, so a __nocast 'int' type may be returned as a regular 'int' +type and then the __nocast is lost. + +So "__nocast" on integer types is usually not that powerful. It just +gets lost too easily. It's more useful for things like pointers. It +also doesn't warn about the mixing: you can add integers to __nocast +integer types, and it's not really considered anything wrong. + +__bitwise ends up being a "stronger integer separation". That one +doesn't allow you to mix with non-bitwise integers, so now it's much +harder to lose the type by mistake. + +So the basic rule is: + + - "__nocast" on its own tends to be more useful for *big* integers +that still need to act like integers, but you want to make it much +less likely that they get truncated by mistake. So a 64-bit integer +that you don't want to mistakenly/silently be returned as "int", for +example. But they mix well with random integer types, so you can add +to them etc without using anything special. However, that mixing also +means that the __nocast really gets lost fairly easily. + + - "__bitwise" is for *unique types* that cannot be mixed with other +types, and that you'd never want to just use as a random integer (the +integer 0 is special, though, and gets silently accepted iirc - it's +kind of like "NULL" for pointers). So "gfp_t" or the "safe endianness" +types would be __bitwise: you can only operate on them by doing +specific operations that know about *that* particular type. + +Generally, you want __bitwise if you are looking for type safety. +"__nocast" really is pretty weak. + +Reference: + +* Linus' e-mail about __nocast vs __bitwise: + + http://article.gmane.org/gmane.linux.kernel.mm/75784 @@ -88,7 +88,7 @@ A. Yeah, well... It parses a fairly complete subset of "extended C" as Q. What other sparse resources are available? -A. Website: http://www.kernel.org/pub/software/devel/sparse/ +A. Wiki: http://sparse.wiki.kernel.org/index.php/Main_Page Mailing list: linux-sparse@vger.kernel.org See http://vger.kernel.org/vger-lists.html#linux-sparse for subscription @@ -7,6 +7,7 @@ CC = gcc CFLAGS = -O2 -finline-functions -fno-strict-aliasing -g CFLAGS += -Wall -Wwrite-strings LDFLAGS += -g +LD = gcc AR = ar ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) @@ -21,6 +22,9 @@ HAVE_GCC_DEP:=$(shell touch .gcc-test.c && \ $(CC) -c -Wp,-MD,.gcc-test.d .gcc-test.c 2>/dev/null && \ echo 'yes'; rm -f .gcc-test.d .gcc-test.o .gcc-test.c) HAVE_GTK2:=$(shell pkg-config --exists gtk+-2.0 2>/dev/null && echo 'yes') +HAVE_LLVM:=$(shell llvm-config --version >/dev/null 2>&1 && echo 'yes') +HAVE_LLVM_VERSION:=$(shell llvm-config --version | grep "^[3-9].*" >/dev/null 2>&1 && echo yes) +LLVM_VERSION=$(shell llvm-config --version) GCC_BASE = $(shell $(CC) --print-file-name=) BASIC_CFLAGS = -DGCC_BASE=\"$(GCC_BASE)\" @@ -63,6 +67,26 @@ else $(warning Your system does not have libgtk2, disabling test-inspect) endif +ifneq ($(HAVE_LLVM),yes) +$(warning Your system does not have llvm, disabling sparse-llvm) +else +ifneq ($(HAVE_LLVM_VERSION),yes) +$(warning LLVM 3.0 or later required. Your system has version $(LLVM_VERSION) installed.) +HAVE_LLVM=no +else +LLVM_PROGS := sparse-llvm +$(LLVM_PROGS): LD := g++ +LDFLAGS += $(shell llvm-config --ldflags) +LLVM_CFLAGS := $(shell llvm-config --cflags | sed -e "s/-DNDEBUG//g") +LLVM_LIBS := $(shell llvm-config --libs) +PROGRAMS += $(LLVM_PROGS) +INST_PROGRAMS += sparse-llvm sparsec +sparse-llvm_EXTRA_DEPS := sparse-llvm.o +sparse-llvm.o $(sparse-llvm_EXTRA_DEPS): BASIC_CFLAGS += $(LLVM_CFLAGS) +sparse-llvm_EXTRA_OBJS := $(LLVM_LIBS) +endif +endif + LIB_H= token.h parse.h lib.h symbol.h scope.h expression.h target.h \ linearize.h bitmap.h ident-list.h compat.h flow.h allocate.h \ storage.h ptrlist.h dissect.h @@ -141,7 +165,7 @@ compile_EXTRA_DEPS = compile-i386.o $(foreach p,$(PROGRAMS),$(eval $(p): $($(p)_EXTRA_DEPS) $(LIBS))) $(PROGRAMS): % : %.o - $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $^ $($@_EXTRA_OBJS) + $(QUIET_LINK)$(LD) $(LDFLAGS) -o $@ $^ $($@_EXTRA_OBJS) $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(AR) rcs $@ $(LIB_OBJS) diff --git a/compile-i386.c b/compile-i386.c index da3ee497..b4709525 100644 --- a/compile-i386.c +++ b/compile-i386.c @@ -761,10 +761,6 @@ static void func_cleanup(struct function *f) struct storage *stor; struct atom *atom; - FOR_EACH_PTR(f->pseudo_list, stor) { - free(stor); - } END_FOR_EACH_PTR(stor); - FOR_EACH_PTR(f->atom_list, atom) { if ((atom->type == ATOM_TEXT) && (atom->text)) free(atom->text); @@ -775,6 +771,10 @@ static void func_cleanup(struct function *f) free(atom); } END_FOR_EACH_PTR(atom); + FOR_EACH_PTR(f->pseudo_list, stor) { + free(stor); + } END_FOR_EACH_PTR(stor); + free_ptr_list(&f->pseudo_list); free(f); } @@ -3320,6 +3320,16 @@ static void evaluate_switch_statement(struct statement *stmt) } END_FOR_EACH_PTR(sym); } +static void evaluate_goto_statement(struct statement *stmt) +{ + struct symbol *label = stmt->goto_label; + + if (label && !label->stmt && !lookup_keyword(label->ident, NS_KEYWORD)) + sparse_error(stmt->pos, "label '%s' was not declared", show_ident(label->ident)); + + evaluate_expression(stmt->goto_expression); +} + struct symbol *evaluate_statement(struct statement *stmt) { if (!stmt) @@ -3378,7 +3388,7 @@ struct symbol *evaluate_statement(struct statement *stmt) case STMT_LABEL: return evaluate_statement(stmt->label_statement); case STMT_GOTO: - evaluate_expression(stmt->goto_expression); + evaluate_goto_statement(stmt); return NULL; case STMT_NONE: break; diff --git a/expression.c b/expression.c index 482e2b17..d2437c74 100644 --- a/expression.c +++ b/expression.c @@ -604,7 +604,7 @@ static struct token *unary_expression(struct token *token, struct expression **t { &__sizeof_ptr___ident, EXPR_PTRSIZEOF }, }; int i; - for (i = 0; i < 3; i++) { + for (i = 0; i < ARRAY_SIZE(type_information); i++) { if (ident == type_information[i].id) return type_info_expression(token, tree, type_information[i].type); } diff --git a/ident-list.h b/ident-list.h index b12d1720..e00cd964 100644 --- a/ident-list.h +++ b/ident-list.h @@ -88,6 +88,10 @@ IDENT(dllimport); IDENT(__dllimport__); IDENT(dllexport); IDENT(__dllexport__); IDENT(restrict); IDENT(__restrict); IDENT(artificial); IDENT(__artificial__); +IDENT(leaf); IDENT(__leaf__); +IDENT(vector_size); IDENT(__vector_size__); +IDENT(error); IDENT(__error__); + /* Preprocessor idents. Direct use of __IDENT avoids mentioning the keyword * itself by name, preventing these tokens from expanding when compiling @@ -224,6 +224,15 @@ static enum { STANDARD_C89, STANDARD_GNU89, STANDARD_GNU99, } standard = STANDARD_GNU89; +#ifdef __x86_64__ +#define ARCH_M64_DEFAULT 1 +#else +#define ARCH_M64_DEFAULT 0 +#endif + +int arch_m64 = ARCH_M64_DEFAULT; +int arch_msize_long = 0; + #define CMDLINE_INCLUDE 20 static int cmdline_include_nr = 0; static char *cmdline_include[CMDLINE_INCLUDE]; @@ -337,19 +346,45 @@ static char **handle_switch_M(char *arg, char **next) static char **handle_switch_m(char *arg, char **next) { if (!strcmp(arg, "m64")) { + arch_m64 = 1; + } else if (!strcmp(arg, "m32")) { + arch_m64 = 0; + } else if (!strcmp(arg, "msize-long")) { + arch_msize_long = 1; + } + return next; +} + +static void handle_arch_m64_finalize(void) +{ + if (arch_m64) { bits_in_long = 64; max_int_alignment = 8; bits_in_pointer = 64; pointer_alignment = 8; size_t_ctype = &ulong_ctype; ssize_t_ctype = &long_ctype; - } else if (!strcmp(arg, "msize-long")) { +#ifdef __x86_64__ + add_pre_buffer("#weak_define __x86_64__ 1\n"); +#endif + } +} + +static void handle_arch_msize_long_finalize(void) +{ + if (arch_msize_long) { size_t_ctype = &ulong_ctype; ssize_t_ctype = &long_ctype; } - return next; } +static void handle_arch_finalize(void) +{ + handle_arch_m64_finalize(); + handle_arch_msize_long_finalize(); +} + + static char **handle_switch_o(char *arg, char **next) { if (!strcmp (arg, "o")) { // "-o foo" @@ -675,6 +710,7 @@ void declare_builtin_functions(void) add_pre_buffer("extern __SIZE_TYPE__ __builtin_strspn(const char *, const char *);\n"); add_pre_buffer("extern __SIZE_TYPE__ __builtin_strcspn(const char *, const char *);\n"); add_pre_buffer("extern char * __builtin_strpbrk(const char *, const char *);\n"); + add_pre_buffer("extern char* __builtin_stpcpy(const char *, const char*);\n"); add_pre_buffer("extern __SIZE_TYPE__ __builtin_strlen(const char *);\n"); /* And bitwise operations.. */ @@ -707,6 +743,8 @@ void declare_builtin_functions(void) add_pre_buffer("extern long __builtin_alpha_cmpbge(long, long);\n"); add_pre_buffer("extern long __builtin_labs(long);\n"); add_pre_buffer("extern double __builtin_fabs(double);\n"); + add_pre_buffer("extern void __sync_synchronize();\n"); + add_pre_buffer("extern int __sync_bool_compare_and_swap(void *, ...);\n"); /* Add Blackfin-specific stuff */ add_pre_buffer( @@ -918,6 +956,8 @@ struct symbol_list *sparse_initialize(int argc, char **argv, struct string_list handle_switch_W_finalize(); handle_switch_v_finalize(); + handle_arch_finalize(); + list = NULL; if (!ptr_list_empty(filelist)) { // Initialize type system diff --git a/linearize.c b/linearize.c index 32097274..1d15cfde 100644 --- a/linearize.c +++ b/linearize.c @@ -1195,6 +1195,7 @@ static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expressi struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype); pseudo_t retval, call; struct ctype *ctype = NULL; + struct symbol *fntype; struct context *context; if (!expr->ctype) { @@ -1212,6 +1213,13 @@ static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expressi if (fn->ctype) ctype = &fn->ctype->ctype; + fntype = fn->ctype; + if (fntype) { + if (fntype->type == SYM_NODE) + fntype = fntype->ctype.base_type; + } + insn->fntype = fntype; + if (fn->type == EXPR_PREOP) { if (fn->unop->type == EXPR_SYMBOL) { struct symbol *sym = fn->unop->symbol; diff --git a/linearize.h b/linearize.h index 50b36018..61fbd831 100644 --- a/linearize.h +++ b/linearize.h @@ -38,6 +38,7 @@ struct pseudo { struct instruction *def; long long value; }; + void *priv; }; extern struct pseudo void_pseudo; @@ -115,6 +116,7 @@ struct instruction { struct /* call */ { pseudo_t func; struct pseudo_list *arguments; + struct symbol *fntype; }; struct /* context */ { int increment; @@ -231,6 +233,7 @@ struct basic_block { struct basic_block_list *children; /* destinations */ struct instruction_list *insns; /* Linear list of instructions */ struct pseudo_list *needs, *defines; + void *priv; }; static inline int is_branch_goto(struct instruction *br) @@ -509,6 +509,8 @@ const char *ignored_attributes[] = { "__dllexport__", "dllimport", "__dllimport__", + "error", + "__error__", "externally_visible", "__externally_visible__", "fastcall", @@ -519,6 +521,8 @@ const char *ignored_attributes[] = { "__format_arg__", "hot", "__hot__", + "leaf", + "__leaf__", "l1_text", "__l1_text__", "l1_data", @@ -568,6 +572,7 @@ const char *ignored_attributes[] = { "used", "__used__", "vector_size", + "__vector_size__", "visibility", "__visibility__", "warn_unused_result", @@ -2274,8 +2279,12 @@ static struct token *statement(struct token *token, struct statement **tree) return s->op->statement(token, stmt); if (match_op(token->next, ':')) { + struct symbol *s = label_symbol(token); stmt->type = STMT_LABEL; - stmt->label_identifier = label_symbol(token); + stmt->label_identifier = s; + if (s->stmt) + sparse_error(stmt->pos, "label '%s' redefined", show_ident(token->ident)); + s->stmt = stmt; token = skip_attributes(token->next->next); return statement(token, &stmt->label_statement); } @@ -35,7 +35,7 @@ int ptr_list_size(struct ptr_list *head) * * The array to linearize into (second argument) should really * be "void *x[]", but we want to let people fill in any kind - * of pointer array, so let's just call it "void *". + * of pointer array, so let's just call it "void **". */ int linearize_ptr_list(struct ptr_list *head, void **arr, int max) { @@ -10,6 +10,7 @@ #include "expression.h" #include "linearize.h" #include "flow.h" +#include "symbol.h" /* Find the trivial parent for a phi-source */ static struct basic_block *phi_parent(struct basic_block *source, pseudo_t pseudo) @@ -667,6 +668,11 @@ static int simplify_cast(struct instruction *insn) orig_type = insn->orig_type; if (!orig_type) return 0; + + /* Keep casts with pointer on either side (not only case of OP_PTRCAST) */ + if (is_ptr_type(orig_type) || is_ptr_type(insn->type)) + return 0; + orig_size = orig_type->bit_size; size = insn->size; src = insn->src; diff --git a/sparse-llvm.c b/sparse-llvm.c new file mode 100644 index 00000000..6f2fbd69 --- /dev/null +++ b/sparse-llvm.c @@ -0,0 +1,1281 @@ +/* + * Example usage: + * ./sparse-llvm hello.c | llc | as -o hello.o + */ + +#include <llvm-c/Core.h> +#include <llvm-c/BitWriter.h> +#include <llvm-c/Analysis.h> + +#include <stdbool.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <assert.h> + +#include "symbol.h" +#include "expression.h" +#include "linearize.h" +#include "flow.h" + +struct phi_fwd { + struct phi_fwd *next; + + LLVMValueRef phi; + pseudo_t pseudo; + bool resolved; +}; + +struct function { + LLVMBuilderRef builder; + LLVMTypeRef type; + LLVMValueRef fn; + LLVMModuleRef module; + + struct phi_fwd *fwd_list; +}; + +static inline bool symbol_is_fp_type(struct symbol *sym) +{ + if (!sym) + return false; + + return sym->ctype.base_type == &fp_type; +} + +static LLVMTypeRef symbol_type(LLVMModuleRef module, struct symbol *sym); + +static LLVMTypeRef func_return_type(LLVMModuleRef module, struct symbol *sym) +{ + return symbol_type(module, sym->ctype.base_type); +} + +static LLVMTypeRef sym_func_type(LLVMModuleRef module, struct symbol *sym) +{ + LLVMTypeRef *arg_type; + LLVMTypeRef func_type; + LLVMTypeRef ret_type; + struct symbol *arg; + int n_arg = 0; + + /* to avoid strangeness with varargs [for now], we build + * the function and type anew, for each call. This + * is probably wrong. We should look up the + * symbol declaration info. + */ + + ret_type = func_return_type(module, sym); + + /* count args, build argument type information */ + FOR_EACH_PTR(sym->arguments, arg) { + n_arg++; + } END_FOR_EACH_PTR(arg); + + arg_type = calloc(n_arg, sizeof(LLVMTypeRef)); + + int idx = 0; + FOR_EACH_PTR(sym->arguments, arg) { + struct symbol *arg_sym = arg->ctype.base_type; + + arg_type[idx++] = symbol_type(module, arg_sym); + } END_FOR_EACH_PTR(arg); + func_type = LLVMFunctionType(ret_type, arg_type, n_arg, + sym->ctype.base_type->variadic); + + return func_type; +} + +static LLVMTypeRef sym_array_type(LLVMModuleRef module, struct symbol *sym) +{ + LLVMTypeRef elem_type; + struct symbol *base_type; + + base_type = sym->ctype.base_type; + + elem_type = symbol_type(module, base_type); + if (!elem_type) + return NULL; + + return LLVMArrayType(elem_type, sym->bit_size / 8); +} + +#define MAX_STRUCT_MEMBERS 64 + +static LLVMTypeRef sym_struct_type(LLVMModuleRef module, struct symbol *sym) +{ + LLVMTypeRef elem_types[MAX_STRUCT_MEMBERS]; + struct symbol *member; + char buffer[256]; + LLVMTypeRef ret; + unsigned nr = 0; + + sprintf(buffer, "%.*s", sym->ident->len, sym->ident->name); + + ret = LLVMGetTypeByName(module, buffer); + if (ret) + return ret; + + ret = LLVMStructCreateNamed(LLVMGetGlobalContext(), buffer); + + FOR_EACH_PTR(sym->symbol_list, member) { + LLVMTypeRef member_type; + + assert(nr < MAX_STRUCT_MEMBERS); + + member_type = symbol_type(module, member); + + elem_types[nr++] = member_type; + } END_FOR_EACH_PTR(member); + + LLVMStructSetBody(ret, elem_types, nr, 0 /* packed? */); + return ret; +} + +static LLVMTypeRef sym_union_type(LLVMModuleRef module, struct symbol *sym) +{ + LLVMTypeRef elements; + unsigned union_size; + + /* + * There's no union support in the LLVM API so we treat unions as + * opaque structs. The downside is that we lose type information on the + * members but as LLVM doesn't care, neither do we. + */ + union_size = sym->bit_size / 8; + + elements = LLVMArrayType(LLVMInt8Type(), union_size); + + return LLVMStructType(&elements, 1, 0 /* packed? */); +} + +static LLVMTypeRef sym_ptr_type(LLVMModuleRef module, struct symbol *sym) +{ + LLVMTypeRef type; + + /* 'void *' is treated like 'char *' */ + if (is_void_type(sym->ctype.base_type)) + type = LLVMInt8Type(); + else + type = symbol_type(module, sym->ctype.base_type); + + return LLVMPointerType(type, 0); +} + +static LLVMTypeRef sym_basetype_type(struct symbol *sym) +{ + LLVMTypeRef ret = NULL; + + if (symbol_is_fp_type(sym)) { + switch (sym->bit_size) { + case 32: + ret = LLVMFloatType(); + break; + case 64: + ret = LLVMDoubleType(); + break; + case 80: + ret = LLVMX86FP80Type(); + break; + default: + die("invalid bit size %d for type %d", sym->bit_size, sym->type); + break; + } + } else { + switch (sym->bit_size) { + case -1: + ret = LLVMVoidType(); + break; + case 1: + ret = LLVMInt1Type(); + break; + case 8: + ret = LLVMInt8Type(); + break; + case 16: + ret = LLVMInt16Type(); + break; + case 32: + ret = LLVMInt32Type(); + break; + case 64: + ret = LLVMInt64Type(); + break; + default: + die("invalid bit size %d for type %d", sym->bit_size, sym->type); + break; + } + } + + return ret; +} + +static LLVMTypeRef symbol_type(LLVMModuleRef module, struct symbol *sym) +{ + LLVMTypeRef ret = NULL; + + switch (sym->type) { + case SYM_BITFIELD: + case SYM_ENUM: + case SYM_NODE: + ret = symbol_type(module, sym->ctype.base_type); + break; + case SYM_BASETYPE: + ret = sym_basetype_type(sym); + break; + case SYM_PTR: + ret = sym_ptr_type(module, sym); + break; + case SYM_UNION: + ret = sym_union_type(module, sym); + break; + case SYM_STRUCT: + ret = sym_struct_type(module, sym); + break; + case SYM_ARRAY: + ret = sym_array_type(module, sym); + break; + case SYM_FN: + ret = sym_func_type(module, sym); + break; + default: + assert(0); + } + return ret; +} + +static LLVMTypeRef insn_symbol_type(LLVMModuleRef module, struct instruction *insn) +{ + if (insn->type) + return symbol_type(module, insn->type); + + switch (insn->size) { + case 8: return LLVMInt8Type(); + case 16: return LLVMInt16Type(); + case 32: return LLVMInt32Type(); + case 64: return LLVMInt64Type(); + + default: + die("invalid bit size %d", insn->size); + break; + } + + return NULL; /* not reached */ +} + +static LLVMLinkage data_linkage(struct symbol *sym) +{ + if (sym->ctype.modifiers & MOD_STATIC) + return LLVMPrivateLinkage; + + return LLVMExternalLinkage; +} + +static LLVMLinkage function_linkage(struct symbol *sym) +{ + if (sym->ctype.modifiers & MOD_STATIC) + return LLVMInternalLinkage; + + return LLVMExternalLinkage; +} + +#define MAX_PSEUDO_NAME 64 + +static void pseudo_name(pseudo_t pseudo, char *buf) +{ + switch (pseudo->type) { + case PSEUDO_REG: + snprintf(buf, MAX_PSEUDO_NAME, "R%d", pseudo->nr); + break; + case PSEUDO_SYM: + assert(0); + break; + case PSEUDO_VAL: + assert(0); + break; + case PSEUDO_ARG: { + assert(0); + break; + } + case PSEUDO_PHI: + snprintf(buf, MAX_PSEUDO_NAME, "PHI%d", pseudo->nr); + break; + default: + assert(0); + } +} + +static LLVMValueRef pseudo_to_value(struct function *fn, struct instruction *insn, pseudo_t pseudo) +{ + LLVMValueRef result = NULL; + + switch (pseudo->type) { + case PSEUDO_REG: + result = pseudo->priv; + break; + case PSEUDO_SYM: { + struct symbol *sym = pseudo->sym; + struct expression *expr; + + assert(sym->bb_target == NULL); + + expr = sym->initializer; + if (expr) { + switch (expr->type) { + case EXPR_STRING: { + const char *s = expr->string->data; + LLVMValueRef indices[] = { LLVMConstInt(LLVMInt64Type(), 0, 0), LLVMConstInt(LLVMInt64Type(), 0, 0) }; + LLVMValueRef data; + + data = LLVMAddGlobal(fn->module, LLVMArrayType(LLVMInt8Type(), strlen(s) + 1), ".str"); + LLVMSetLinkage(data, LLVMPrivateLinkage); + LLVMSetGlobalConstant(data, 1); + LLVMSetInitializer(data, LLVMConstString(strdup(s), strlen(s) + 1, true)); + + result = LLVMConstGEP(data, indices, ARRAY_SIZE(indices)); + break; + } + case EXPR_SYMBOL: { + struct symbol *sym = expr->symbol; + + result = LLVMGetNamedGlobal(fn->module, show_ident(sym->ident)); + assert(result != NULL); + break; + } + default: + assert(0); + } + } else { + const char *name = show_ident(sym->ident); + + result = LLVMGetNamedGlobal(fn->module, name); + if (!result) { + LLVMTypeRef type = symbol_type(fn->module, sym); + result = LLVMAddGlobal(fn->module, type, name); + } + } + break; + } + case PSEUDO_VAL: + result = LLVMConstInt(insn_symbol_type(fn->module, insn), pseudo->value, 1); + break; + case PSEUDO_ARG: { + result = LLVMGetParam(fn->fn, pseudo->nr - 1); + break; + } + case PSEUDO_PHI: + result = pseudo->priv; + break; + case PSEUDO_VOID: + result = NULL; + break; + default: + assert(0); + } + + return result; +} + +static LLVMTypeRef pseudo_type(struct function *fn, struct instruction *insn, pseudo_t pseudo) +{ + LLVMValueRef v; + LLVMTypeRef result = NULL; + + if (pseudo->priv) { + v = pseudo->priv; + return LLVMTypeOf(v); + } + + switch (pseudo->type) { + case PSEUDO_REG: + result = symbol_type(fn->module, pseudo->def->type); + break; + case PSEUDO_SYM: { + struct symbol *sym = pseudo->sym; + struct expression *expr; + + assert(sym->bb_target == NULL); + assert(sym->ident == NULL); + + expr = sym->initializer; + if (expr) { + switch (expr->type) { + case EXPR_STRING: + result = LLVMPointerType(LLVMInt8Type(), 0); + break; + default: + assert(0); + } + } + break; + } + case PSEUDO_VAL: + result = insn_symbol_type(fn->module, insn); + break; + case PSEUDO_ARG: + result = LLVMTypeOf(LLVMGetParam(fn->fn, pseudo->nr - 1)); + break; + case PSEUDO_PHI: + assert(0); + break; + case PSEUDO_VOID: + result = LLVMVoidType(); + break; + default: + assert(0); + } + + return result; +} + +static LLVMRealPredicate translate_fop(int opcode) +{ + static const LLVMRealPredicate trans_tbl[] = { + [OP_SET_EQ] = LLVMRealOEQ, + [OP_SET_NE] = LLVMRealUNE, + [OP_SET_LE] = LLVMRealOLE, + [OP_SET_GE] = LLVMRealOGE, + [OP_SET_LT] = LLVMRealOLT, + [OP_SET_GT] = LLVMRealOGT, + /* Are these used with FP? */ + [OP_SET_B] = LLVMRealOLT, + [OP_SET_A] = LLVMRealOGT, + [OP_SET_BE] = LLVMRealOLE, + [OP_SET_AE] = LLVMRealOGE, + }; + + return trans_tbl[opcode]; +} + +static LLVMIntPredicate translate_op(int opcode) +{ + static const LLVMIntPredicate trans_tbl[] = { + [OP_SET_EQ] = LLVMIntEQ, + [OP_SET_NE] = LLVMIntNE, + [OP_SET_LE] = LLVMIntSLE, + [OP_SET_GE] = LLVMIntSGE, + [OP_SET_LT] = LLVMIntSLT, + [OP_SET_GT] = LLVMIntSGT, + [OP_SET_B] = LLVMIntULT, + [OP_SET_A] = LLVMIntUGT, + [OP_SET_BE] = LLVMIntULE, + [OP_SET_AE] = LLVMIntUGE, + }; + + return trans_tbl[opcode]; +} + +static void output_op_binary(struct function *fn, struct instruction *insn) +{ + LLVMValueRef lhs, rhs, target; + char target_name[64]; + + lhs = pseudo_to_value(fn, insn, insn->src1); + + rhs = pseudo_to_value(fn, insn, insn->src2); + + pseudo_name(insn->target, target_name); + + switch (insn->opcode) { + /* Binary */ + case OP_ADD: + if (symbol_is_fp_type(insn->type)) + target = LLVMBuildFAdd(fn->builder, lhs, rhs, target_name); + else + target = LLVMBuildAdd(fn->builder, lhs, rhs, target_name); + break; + case OP_SUB: + if (symbol_is_fp_type(insn->type)) + target = LLVMBuildFSub(fn->builder, lhs, rhs, target_name); + else + target = LLVMBuildSub(fn->builder, lhs, rhs, target_name); + break; + case OP_MULU: + if (symbol_is_fp_type(insn->type)) + target = LLVMBuildFMul(fn->builder, lhs, rhs, target_name); + else + target = LLVMBuildMul(fn->builder, lhs, rhs, target_name); + break; + case OP_MULS: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildMul(fn->builder, lhs, rhs, target_name); + break; + case OP_DIVU: + if (symbol_is_fp_type(insn->type)) + target = LLVMBuildFDiv(fn->builder, lhs, rhs, target_name); + else + target = LLVMBuildUDiv(fn->builder, lhs, rhs, target_name); + break; + case OP_DIVS: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildSDiv(fn->builder, lhs, rhs, target_name); + break; + case OP_MODU: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildURem(fn->builder, lhs, rhs, target_name); + break; + case OP_MODS: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildSRem(fn->builder, lhs, rhs, target_name); + break; + case OP_SHL: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildShl(fn->builder, lhs, rhs, target_name); + break; + case OP_LSR: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildLShr(fn->builder, lhs, rhs, target_name); + break; + case OP_ASR: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildAShr(fn->builder, lhs, rhs, target_name); + break; + + /* Logical */ + case OP_AND: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildAnd(fn->builder, lhs, rhs, target_name); + break; + case OP_OR: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildOr(fn->builder, lhs, rhs, target_name); + break; + case OP_XOR: + assert(!symbol_is_fp_type(insn->type)); + target = LLVMBuildXor(fn->builder, lhs, rhs, target_name); + break; + case OP_AND_BOOL: { + LLVMValueRef x, y; + + assert(!symbol_is_fp_type(insn->type)); + + y = LLVMBuildICmp(fn->builder, LLVMIntNE, lhs, LLVMConstInt(LLVMTypeOf(lhs), 0, 0), "y"); + x = LLVMBuildICmp(fn->builder, LLVMIntNE, rhs, LLVMConstInt(LLVMTypeOf(rhs), 0, 0), "x"); + + target = LLVMBuildAnd(fn->builder, y, x, target_name); + break; + } + case OP_OR_BOOL: { + LLVMValueRef tmp; + + assert(!symbol_is_fp_type(insn->type)); + + tmp = LLVMBuildOr(fn->builder, rhs, lhs, "tmp"); + + target = LLVMBuildICmp(fn->builder, LLVMIntNE, tmp, LLVMConstInt(LLVMTypeOf(tmp), 0, 0), target_name); + break; + } + + /* Binary comparison */ + case OP_BINCMP ... OP_BINCMP_END: { + if (LLVMGetTypeKind(LLVMTypeOf(lhs)) == LLVMIntegerTypeKind) { + LLVMIntPredicate op = translate_op(insn->opcode); + + target = LLVMBuildICmp(fn->builder, op, lhs, rhs, target_name); + } else { + LLVMRealPredicate op = translate_fop(insn->opcode); + + target = LLVMBuildFCmp(fn->builder, op, lhs, rhs, target_name); + } + break; + } + default: + assert(0); + break; + } + + insn->target->priv = target; +} + +static void output_op_ret(struct function *fn, struct instruction *insn) +{ + pseudo_t pseudo = insn->src; + + if (pseudo && pseudo != VOID) { + LLVMValueRef result = pseudo_to_value(fn, insn, pseudo); + + LLVMBuildRet(fn->builder, result); + } else + LLVMBuildRetVoid(fn->builder); +} + +static void output_op_load(struct function *fn, struct instruction *insn) +{ + LLVMTypeRef int_type; + LLVMValueRef src_p, src_i, ofs_i, addr_i, addr, target; + + /* int type large enough to hold a pointer */ + int_type = LLVMIntType(bits_in_pointer); + + /* convert to integer, add src + offset */ + src_p = pseudo_to_value(fn, insn, insn->src); + src_i = LLVMBuildPtrToInt(fn->builder, src_p, int_type, "src_i"); + + ofs_i = LLVMConstInt(int_type, insn->offset, 0); + addr_i = LLVMBuildAdd(fn->builder, src_i, ofs_i, "addr_i"); + + /* convert address back to pointer */ + addr = LLVMBuildIntToPtr(fn->builder, addr_i, + LLVMTypeOf(src_p), "addr"); + + /* perform load */ + target = LLVMBuildLoad(fn->builder, addr, "load_target"); + + insn->target->priv = target; +} + +static void output_op_store(struct function *fn, struct instruction *insn) +{ + LLVMTypeRef int_type; + LLVMValueRef src_p, src_i, ofs_i, addr_i, addr, target, target_in; + + /* int type large enough to hold a pointer */ + int_type = LLVMIntType(bits_in_pointer); + + /* convert to integer, add src + offset */ + src_p = pseudo_to_value(fn, insn, insn->src); + src_i = LLVMBuildPtrToInt(fn->builder, src_p, int_type, "src_i"); + + ofs_i = LLVMConstInt(int_type, insn->offset, 0); + addr_i = LLVMBuildAdd(fn->builder, src_i, ofs_i, "addr_i"); + + /* convert address back to pointer */ + addr = LLVMBuildIntToPtr(fn->builder, addr_i, + LLVMPointerType(int_type, 0), "addr"); + + target_in = pseudo_to_value(fn, insn, insn->target); + + /* perform store */ + target = LLVMBuildStore(fn->builder, target_in, addr); + + insn->target->priv = target; +} + +static LLVMValueRef bool_value(struct function *fn, LLVMValueRef value) +{ + if (LLVMTypeOf(value) != LLVMInt1Type()) + value = LLVMBuildIsNotNull(fn->builder, value, "cond"); + + return value; +} + +static void output_op_br(struct function *fn, struct instruction *br) +{ + if (br->cond) { + LLVMValueRef cond = bool_value(fn, + pseudo_to_value(fn, br, br->cond)); + + LLVMBuildCondBr(fn->builder, cond, + br->bb_true->priv, + br->bb_false->priv); + } else + LLVMBuildBr(fn->builder, + br->bb_true ? br->bb_true->priv : + br->bb_false->priv); +} + +static void output_op_sel(struct function *fn, struct instruction *insn) +{ + LLVMValueRef target, src1, src2, src3; + + src1 = bool_value(fn, pseudo_to_value(fn, insn, insn->src1)); + src2 = pseudo_to_value(fn, insn, insn->src2); + src3 = pseudo_to_value(fn, insn, insn->src3); + + target = LLVMBuildSelect(fn->builder, src1, src2, src3, "select"); + + insn->target->priv = target; +} + +static void output_op_switch(struct function *fn, struct instruction *insn) +{ + LLVMValueRef sw_val, target; + struct basic_block *def = NULL; + struct multijmp *jmp; + int n_jmp = 0; + + FOR_EACH_PTR(insn->multijmp_list, jmp) { + if (jmp->begin == jmp->end) { /* case N */ + n_jmp++; + } else if (jmp->begin < jmp->end) { /* case M..N */ + assert(0); + } else /* default case */ + def = jmp->target; + } END_FOR_EACH_PTR(jmp); + + sw_val = pseudo_to_value(fn, insn, insn->target); + target = LLVMBuildSwitch(fn->builder, sw_val, + def ? def->priv : NULL, n_jmp); + + FOR_EACH_PTR(insn->multijmp_list, jmp) { + if (jmp->begin == jmp->end) { /* case N */ + LLVMAddCase(target, + LLVMConstInt(LLVMInt32Type(), jmp->begin, 0), + jmp->target->priv); + } else if (jmp->begin < jmp->end) { /* case M..N */ + assert(0); + } + } END_FOR_EACH_PTR(jmp); + + insn->target->priv = target; +} + +struct llfunc { + char name[256]; /* wasteful */ + LLVMValueRef func; +}; + +DECLARE_ALLOCATOR(llfunc); +DECLARE_PTR_LIST(llfunc_list, struct llfunc); +ALLOCATOR(llfunc, "llfuncs"); + +static struct local_module { + struct llfunc_list *llfunc_list; +} mi; + +static LLVMTypeRef get_func_type(struct function *fn, struct instruction *insn) +{ + struct symbol *sym = insn->func->sym; + char buffer[256]; + LLVMTypeRef func_type, ret_type; + struct pseudo *arg; + int n_arg = 0; + LLVMTypeRef *arg_type; + + if (sym->ident) + sprintf(buffer, "%.*s", sym->ident->len, sym->ident->name); + else + sprintf(buffer, "<anon sym %p>", sym); + + /* VERIFY: is this correct, for functions? */ + func_type = LLVMGetTypeByName(fn->module, buffer); + if (func_type) + return func_type; + + /* to avoid strangeness with varargs [for now], we build + * the function and type anew, for each call. This + * is probably wrong. We should look up the + * symbol declaration info. + */ + + /* build return type */ + if (insn->target && insn->target != VOID) + ret_type = pseudo_type(fn, insn, insn->target); + else + ret_type = LLVMVoidType(); + + /* count args, build argument type information */ + FOR_EACH_PTR(insn->arguments, arg) { + n_arg++; + } END_FOR_EACH_PTR(arg); + + arg_type = calloc(n_arg, sizeof(LLVMTypeRef)); + + int idx = 0; + FOR_EACH_PTR(insn->arguments, arg) { + arg_type[idx++] = pseudo_type(fn, insn, arg); + } END_FOR_EACH_PTR(arg); + + func_type = LLVMFunctionType(ret_type, arg_type, n_arg, + insn->fntype->variadic); + + return func_type; +} + +static LLVMValueRef get_function(struct function *fn, struct instruction *insn) +{ + struct symbol *sym = insn->func->sym; + char buffer[256]; + LLVMValueRef func; + struct llfunc *f; + + if (sym->ident) + sprintf(buffer, "%.*s", sym->ident->len, sym->ident->name); + else + sprintf(buffer, "<anon sym %p>", sym); + + + /* search for pre-built function type definition */ + FOR_EACH_PTR(mi.llfunc_list, f) { + if (!strcmp(f->name, buffer)) + return f->func; /* found match; return */ + } END_FOR_EACH_PTR(f); + + /* build function type definition */ + LLVMTypeRef func_type = get_func_type(fn, insn); + + func = LLVMAddFunction(fn->module, buffer, func_type); + + /* store built function on list, for later referencing */ + f = calloc(1, sizeof(*f)); + strncpy(f->name, buffer, sizeof(f->name) - 1); + f->func = func; + + add_ptr_list(&mi.llfunc_list, f); + + return func; +} + +static void output_op_call(struct function *fn, struct instruction *insn) +{ + LLVMValueRef target, func; + int n_arg = 0, i; + struct pseudo *arg; + LLVMValueRef *args; + + FOR_EACH_PTR(insn->arguments, arg) { + n_arg++; + } END_FOR_EACH_PTR(arg); + + args = calloc(n_arg, sizeof(LLVMValueRef)); + + i = 0; + FOR_EACH_PTR(insn->arguments, arg) { + args[i++] = pseudo_to_value(fn, insn, arg); + } END_FOR_EACH_PTR(arg); + + func = get_function(fn, insn); + target = LLVMBuildCall(fn->builder, func, args, n_arg, ""); + + insn->target->priv = target; +} + +static void store_phi_fwd(struct function *fn, LLVMValueRef phi, + pseudo_t pseudo) +{ + struct phi_fwd *fwd; + + fwd = calloc(1, sizeof(*fwd)); + fwd->phi = phi; + fwd->pseudo = pseudo; + + /* append fwd ref to function-wide list */ + if (!fn->fwd_list) + fn->fwd_list = fwd; + else { + struct phi_fwd *last = fn->fwd_list; + + while (last->next) + last = last->next; + last->next = fwd; + } +} + +static void output_phi_fwd(struct function *fn, pseudo_t pseudo, LLVMValueRef v) +{ + struct phi_fwd *fwd = fn->fwd_list; + + while (fwd) { + struct phi_fwd *tmp; + + tmp = fwd; + fwd = fwd->next; + + if (tmp->pseudo == pseudo && !tmp->resolved) { + LLVMValueRef phi_vals[1]; + LLVMBasicBlockRef phi_blks[1]; + + phi_vals[0] = v; + phi_blks[0] = pseudo->def->bb->priv; + + LLVMAddIncoming(tmp->phi, phi_vals, phi_blks, 1); + + tmp->resolved = true; + } + } +} + +static void output_op_phisrc(struct function *fn, struct instruction *insn) +{ + LLVMValueRef v; + + assert(insn->target->priv == NULL); + + /* target = src */ + v = pseudo_to_value(fn, insn, insn->phi_src); + insn->target->priv = v; + + assert(insn->target->priv != NULL); + + /* resolve forward references to this phi source, if present */ + output_phi_fwd(fn, insn->target, v); +} + +static void output_op_phi(struct function *fn, struct instruction *insn) +{ + pseudo_t phi; + LLVMValueRef target; + + target = LLVMBuildPhi(fn->builder, insn_symbol_type(fn->module, insn), + "phi"); + int pll = 0; + FOR_EACH_PTR(insn->phi_list, phi) { + if (pseudo_to_value(fn, insn, phi)) /* skip VOID, fwd refs*/ + pll++; + } END_FOR_EACH_PTR(phi); + + LLVMValueRef *phi_vals = calloc(pll, sizeof(LLVMValueRef)); + LLVMBasicBlockRef *phi_blks = calloc(pll, sizeof(LLVMBasicBlockRef)); + + int idx = 0; + FOR_EACH_PTR(insn->phi_list, phi) { + LLVMValueRef v; + + v = pseudo_to_value(fn, insn, phi); + if (v) { /* skip VOID, fwd refs */ + phi_vals[idx] = v; + phi_blks[idx] = phi->def->bb->priv; + idx++; + } + else if (phi->type == PSEUDO_PHI) /* fwd ref */ + store_phi_fwd(fn, target, phi); + } END_FOR_EACH_PTR(phi); + + LLVMAddIncoming(target, phi_vals, phi_blks, pll); + + insn->target->priv = target; +} + +static void output_op_ptrcast(struct function *fn, struct instruction *insn) +{ + LLVMValueRef src, target; + char target_name[64]; + + src = insn->src->priv; + if (!src) + src = pseudo_to_value(fn, insn, insn->src); + + pseudo_name(insn->target, target_name); + + assert(!symbol_is_fp_type(insn->type)); + + target = LLVMBuildBitCast(fn->builder, src, insn_symbol_type(fn->module, insn), target_name); + + insn->target->priv = target; +} + +static void output_op_cast(struct function *fn, struct instruction *insn, LLVMOpcode op) +{ + LLVMValueRef src, target; + char target_name[64]; + + src = insn->src->priv; + if (!src) + src = pseudo_to_value(fn, insn, insn->src); + + pseudo_name(insn->target, target_name); + + assert(!symbol_is_fp_type(insn->type)); + + if (insn->size < LLVMGetIntTypeWidth(LLVMTypeOf(src))) + target = LLVMBuildTrunc(fn->builder, src, insn_symbol_type(fn->module, insn), target_name); + else + target = LLVMBuildCast(fn->builder, op, src, insn_symbol_type(fn->module, insn), target_name); + + insn->target->priv = target; +} + +static void output_op_copy(struct function *fn, struct instruction *insn, + pseudo_t pseudo) +{ + LLVMValueRef src, target; + LLVMTypeRef const_type; + char target_name[64]; + + pseudo_name(insn->target, target_name); + src = pseudo_to_value(fn, insn, pseudo); + const_type = insn_symbol_type(fn->module, insn); + + /* + * This is nothing more than 'target = src' + * + * TODO: find a better way to provide an identity function, + * than using "X + 0" simply to produce a new LLVM pseudo + */ + + if (symbol_is_fp_type(insn->type)) + target = LLVMBuildFAdd(fn->builder, src, + LLVMConstReal(const_type, 0.0), target_name); + else + target = LLVMBuildAdd(fn->builder, src, + LLVMConstInt(const_type, 0, 0), target_name); + + insn->target->priv = target; +} + +static void output_insn(struct function *fn, struct instruction *insn) +{ + switch (insn->opcode) { + case OP_RET: + output_op_ret(fn, insn); + break; + case OP_BR: + output_op_br(fn, insn); + break; + case OP_SYMADDR: + assert(0); + break; + case OP_SETVAL: + assert(0); + break; + case OP_SWITCH: + output_op_switch(fn, insn); + break; + case OP_COMPUTEDGOTO: + assert(0); + break; + case OP_PHISOURCE: + output_op_phisrc(fn, insn); + break; + case OP_PHI: + output_op_phi(fn, insn); + break; + case OP_LOAD: + output_op_load(fn, insn); + break; + case OP_LNOP: + assert(0); + break; + case OP_STORE: + output_op_store(fn, insn); + break; + case OP_SNOP: + assert(0); + break; + case OP_INLINED_CALL: + assert(0); + break; + case OP_CALL: + output_op_call(fn, insn); + break; + case OP_CAST: + output_op_cast(fn, insn, LLVMZExt); + break; + case OP_SCAST: + output_op_cast(fn, insn, LLVMSExt); + break; + case OP_FPCAST: + assert(0); + break; + case OP_PTRCAST: + output_op_ptrcast(fn, insn); + break; + case OP_BINARY ... OP_BINARY_END: + case OP_BINCMP ... OP_BINCMP_END: + output_op_binary(fn, insn); + break; + case OP_SEL: + output_op_sel(fn, insn); + break; + case OP_SLICE: + assert(0); + break; + case OP_NOT: { + LLVMValueRef src, target; + char target_name[64]; + + src = pseudo_to_value(fn, insn, insn->src); + + pseudo_name(insn->target, target_name); + + target = LLVMBuildNot(fn->builder, src, target_name); + + insn->target->priv = target; + break; + } + case OP_NEG: + assert(0); + break; + case OP_CONTEXT: + assert(0); + break; + case OP_RANGE: + assert(0); + break; + case OP_NOP: + assert(0); + break; + case OP_DEATHNOTE: + assert(0); + break; + case OP_ASM: + assert(0); + break; + case OP_COPY: + output_op_copy(fn, insn, insn->src); + break; + default: + break; + } +} + +static void output_bb(struct function *fn, struct basic_block *bb, unsigned long generation) +{ + struct instruction *insn; + + bb->generation = generation; + + FOR_EACH_PTR(bb->insns, insn) { + if (!insn->bb) + continue; + + output_insn(fn, insn); + } + END_FOR_EACH_PTR(insn); +} + +#define MAX_ARGS 64 + +static void output_fn(LLVMModuleRef module, struct entrypoint *ep) +{ + unsigned long generation = ++bb_generation; + struct symbol *sym = ep->name; + struct symbol *base_type = sym->ctype.base_type; + struct symbol *ret_type = sym->ctype.base_type->ctype.base_type; + LLVMTypeRef arg_types[MAX_ARGS]; + LLVMTypeRef return_type; + struct function function = { .module = module }; + struct basic_block *bb; + struct symbol *arg; + const char *name; + int nr_args = 0; + struct llfunc *f; + + FOR_EACH_PTR(base_type->arguments, arg) { + struct symbol *arg_base_type = arg->ctype.base_type; + + arg_types[nr_args++] = symbol_type(module, arg_base_type); + } END_FOR_EACH_PTR(arg); + + name = show_ident(sym->ident); + + return_type = symbol_type(module, ret_type); + + function.type = LLVMFunctionType(return_type, arg_types, nr_args, 0); + + function.fn = LLVMAddFunction(module, name, function.type); + LLVMSetFunctionCallConv(function.fn, LLVMCCallConv); + + LLVMSetLinkage(function.fn, function_linkage(sym)); + + /* store built function on list, for later referencing */ + f = calloc(1, sizeof(*f)); + strncpy(f->name, name, sizeof(f->name) - 1); + f->func = function.fn; + + add_ptr_list(&mi.llfunc_list, f); + + function.builder = LLVMCreateBuilder(); + + static int nr_bb; + + FOR_EACH_PTR(ep->bbs, bb) { + if (bb->generation == generation) + continue; + + LLVMBasicBlockRef bbr; + char bbname[32]; + + sprintf(bbname, "L%d", nr_bb++); + bbr = LLVMAppendBasicBlock(function.fn, bbname); + + bb->priv = bbr; + } + END_FOR_EACH_PTR(bb); + + FOR_EACH_PTR(ep->bbs, bb) { + if (bb->generation == generation) + continue; + + LLVMPositionBuilderAtEnd(function.builder, bb->priv); + + output_bb(&function, bb, generation); + } + END_FOR_EACH_PTR(bb); +} + +static LLVMValueRef output_data(LLVMModuleRef module, struct symbol *sym) +{ + struct expression *initializer = sym->initializer; + LLVMValueRef initial_value; + LLVMValueRef data; + const char *name; + + if (initializer) { + switch (initializer->type) { + case EXPR_VALUE: + initial_value = LLVMConstInt(symbol_type(module, sym), initializer->value, 1); + break; + case EXPR_SYMBOL: { + struct symbol *sym = initializer->symbol; + + initial_value = LLVMGetNamedGlobal(module, show_ident(sym->ident)); + if (!initial_value) + initial_value = output_data(module, sym); + break; + } + case EXPR_STRING: { + const char *s = initializer->string->data; + + initial_value = LLVMConstString(strdup(s), strlen(s) + 1, true); + break; + } + default: + assert(0); + } + } else { + LLVMTypeRef type = symbol_type(module, sym); + + initial_value = LLVMConstNull(type); + } + + name = show_ident(sym->ident); + + data = LLVMAddGlobal(module, LLVMTypeOf(initial_value), name); + + LLVMSetLinkage(data, data_linkage(sym)); + + if (!(sym->ctype.modifiers & MOD_EXTERN)) + LLVMSetInitializer(data, initial_value); + + return data; +} + +static int compile(LLVMModuleRef module, struct symbol_list *list) +{ + struct symbol *sym; + + FOR_EACH_PTR(list, sym) { + struct entrypoint *ep; + expand_symbol(sym); + ep = linearize_symbol(sym); + if (ep) + output_fn(module, ep); + else + output_data(module, sym); + } + END_FOR_EACH_PTR(sym); + + return 0; +} + +int main(int argc, char **argv) +{ + struct string_list * filelist = NULL; + char *file; + + LLVMModuleRef module = LLVMModuleCreateWithName("sparse"); + + compile(module, sparse_initialize(argc, argv, &filelist)); + + FOR_EACH_PTR_NOTAG(filelist, file) { + compile(module, sparse(file)); + } END_FOR_EACH_PTR_NOTAG(file); + + LLVMVerifyModule(module, LLVMPrintMessageAction, NULL); + + LLVMWriteBitcodeToFD(module, STDOUT_FILENO, 0, 0); + + LLVMDisposeModule(module); + + return 0; +} @@ -53,7 +53,19 @@ arithmetic operations other than bitwise operations, and on any conversion of one restricted type into another, except via a cast that includes \fB__attribute__((force))\fR. -Sparse does not issue these warnings by default. +__bitwise ends up being a "stronger integer separation". That one +doesn't allow you to mix with non-bitwise integers, so now it's much +harder to lose the type by mistake. + +__bitwise is for *unique types* that cannot be mixed with other +types, and that you'd never want to just use as a random integer (the +integer 0 is special, though, and gets silently accepted iirc - it's +kind of like "NULL" for pointers). So "gfp_t" or the "safe endianness" +types would be __bitwise: you can only operate on them by doing +specific operations that know about *that* particular type. + +Generally, you want bitwise if you are looking for type safety. Sparse +does not issue these warnings by default. . .TP .B \-Wcast\-to\-as diff --git a/sparsec b/sparsec new file mode 100755 index 00000000..9c90b305 --- /dev/null +++ b/sparsec @@ -0,0 +1,52 @@ +#!/bin/sh +# +# GCC compatible C compiler based on Sparse LLVM + +set +e + +SPARSEOPTS="" +DIRNAME=`dirname $0` + +NEED_LINK=1 + +if [ $# -eq 0 ]; then + echo "`basename $0`: no input files" + exit 1 +fi + +while [ $# -gt 0 ]; do + case $1 in + '-o') + OUTFILE=$2 + shift + ;; + '-c') + NEED_LINK=0 + ;; + *) + SPARSEOPTS="$SPARSEOPTS $1 " ;; + esac + shift +done + +TMPLLVM=`mktemp -t tmp.XXXXXX`".llvm" +TMPFILE=`mktemp -t tmp.XXXXXX`".o" + +$DIRNAME/sparse-llvm $SPARSEOPTS > $TMPLLVM + +llc -o - $TMPLLVM | as -o $TMPFILE + +if [ $NEED_LINK -eq 1 ]; then + if [ -z $OUTFILE ]; then + OUTFILE=a.out + fi + gcc $TMPFILE -o $OUTFILE +else + if [ -z $OUTFILE ]; then + echo "`basename $0`: no output file" + exit 1 + fi + mv $TMPFILE $OUTFILE +fi + +rm -f $TMPLLVM @@ -55,7 +55,7 @@ static void replace_phi_node(struct instruction *phi) track_phi_uses(phi); phi->opcode = OP_COPY; - phi->src = tmp; + use_pseudo(phi, tmp, &phi->src); // FIXME: free phi->phi_list; } diff --git a/validation/backend/arithmetic-ops.c b/validation/backend/arithmetic-ops.c new file mode 100644 index 00000000..7c299d03 --- /dev/null +++ b/validation/backend/arithmetic-ops.c @@ -0,0 +1,94 @@ +static int add(int x, int y) +{ + return x + y; +} + +static unsigned int uadd(unsigned int x, unsigned int y) +{ + return x + y; +} + +static float fadd(float x, float y) +{ + return x + y; +} + +static double dadd(double x, double y) +{ + return x + y; +} + +static int sub(int x, int y) +{ + return x - y; +} + +static unsigned int usub(unsigned int x, unsigned int y) +{ + return x - y; +} + +static float fsub(float x, float y) +{ + return x - y; +} + +static double dsub(double x, double y) +{ + return x - y; +} + +static int mul(int x, int y) +{ + return x * y; +} + +static unsigned int umul(unsigned int x, unsigned int y) +{ + return x * y; +} + +static float fmul(float x, float y) +{ + return x * y; +} + +static double dmul(double x, double y) +{ + return x * y; +} + +static int div(int x, int y) +{ + return x / y; +} + +static unsigned int udiv(unsigned int x, unsigned int y) +{ + return x / y; +} + +static float fdiv(float x, float y) +{ + return x / y; +} + +static double ddiv(double x, double y) +{ + return x / y; +} + +static int mod(int x, int y) +{ + return x % y; +} + +static unsigned int umod(unsigned int x, unsigned int y) +{ + return x % y; +} + +/* + * check-name: Arithmetic operator code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/array.c b/validation/backend/array.c new file mode 100644 index 00000000..bd3ec596 --- /dev/null +++ b/validation/backend/array.c @@ -0,0 +1,6 @@ +static char array[128]; + +/* + * check-name: Array code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/bitwise-ops.c b/validation/backend/bitwise-ops.c new file mode 100644 index 00000000..659c7639 --- /dev/null +++ b/validation/backend/bitwise-ops.c @@ -0,0 +1,64 @@ +static int shl(int x, int y) +{ + return x << y; +} + +static unsigned int ushl(unsigned int x, unsigned int y) +{ + return x << y; +} + +static int shr(int x, int y) +{ + return x >> y; +} + +static unsigned int ushr(unsigned int x, unsigned int y) +{ + return x >> y; +} + +static int and(int x, int y) +{ + return x & y; +} + +static unsigned int uand(unsigned int x, unsigned int y) +{ + return x & y; +} + +static int or(int x, int y) +{ + return x | y; +} + +static unsigned int uor(unsigned int x, unsigned int y) +{ + return x | y; +} + +static int xor(int x, int y) +{ + return x ^ y; +} + +static unsigned int uxor(unsigned int x, unsigned int y) +{ + return x ^ y; +} + +static int not(int x) +{ + return ~x; +} + +static unsigned int unot(unsigned int x) +{ + return ~x; +} + +/* + * check-name: Bitwise operator code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/bool-test.c b/validation/backend/bool-test.c new file mode 100644 index 00000000..a6f33a1a --- /dev/null +++ b/validation/backend/bool-test.c @@ -0,0 +1,9 @@ +static _Bool return_false(void) +{ + return 0; +} + +/* + * check-name: Boolean type code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/cast.c b/validation/backend/cast.c new file mode 100644 index 00000000..598b16aa --- /dev/null +++ b/validation/backend/cast.c @@ -0,0 +1,50 @@ +typedef _Bool bool; +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef long long longlong; +typedef unsigned long long ulonglong; + +#define DEFINE_CAST(from, to) \ + static to from##2##to(from x) { \ + return x; \ + } + +#define DEFINE_CASTS(from) \ + DEFINE_CAST(from, bool) \ + DEFINE_CAST(from, char) \ + DEFINE_CAST(from, uchar) \ + DEFINE_CAST(from, short) \ + DEFINE_CAST(from, ushort) \ + DEFINE_CAST(from, int) \ + DEFINE_CAST(from, uint) \ + DEFINE_CAST(from, long) \ + DEFINE_CAST(from, ulong) \ + DEFINE_CAST(from, longlong) \ + DEFINE_CAST(from, ulonglong) \ +/* + DEFINE_CAST(from, float) \ + DEFINE_CAST(from, double) +*/ + +DEFINE_CASTS(bool) +DEFINE_CASTS(char) +DEFINE_CASTS(uchar) +DEFINE_CASTS(short) +DEFINE_CASTS(ushort) +DEFINE_CASTS(int) +DEFINE_CASTS(uint) +DEFINE_CASTS(long) +DEFINE_CASTS(ulong) +DEFINE_CASTS(longlong) +DEFINE_CASTS(ulonglong) +/* +DEFINE_CASTS(float) +DEFINE_CASTS(double) +*/ + +/* + * check-name: Cast code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/cmp-ops.c b/validation/backend/cmp-ops.c new file mode 100644 index 00000000..a5f736d7 --- /dev/null +++ b/validation/backend/cmp-ops.c @@ -0,0 +1,84 @@ +static int sete(int x, int y) +{ + return x == y; +} + +static int setne(int x, int y) +{ + return x != y; +} + +static int setl(int x, int y) +{ + return x < y; +} + +static int setg(int x, int y) +{ + return x > y; +} + +static int setle(int x, int y) +{ + return x <= y; +} + +static int setge(int x, int y) +{ + return x >= y; +} + +static int setb(unsigned int x, unsigned int y) +{ + return x < y; +} + +static int seta(unsigned int x, unsigned int y) +{ + return x > y; +} + +static int setbe(unsigned int x, unsigned int y) +{ + return x <= y; +} + +static int setae(unsigned int x, unsigned int y) +{ + return x >= y; +} + +static int setfe(float x, float y) +{ + return x == y; +} + +static int setfne(float x, float y) +{ + return x != y; +} + +static int setfl(float x, float y) +{ + return x < y; +} + +static int setfg(float x, float y) +{ + return x > y; +} + +static int setfle(float x, float y) +{ + return x <= y; +} + +static int setfge(float x, float y) +{ + return x >= y; +} + +/* + * check-name: Comparison operator code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/extern.c b/validation/backend/extern.c new file mode 100644 index 00000000..24cbae55 --- /dev/null +++ b/validation/backend/extern.c @@ -0,0 +1,11 @@ +extern unsigned long foo; + +static unsigned long bar(void) +{ + return foo; +} + +/* + * check-name: Extern symbol code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/function-ptr.c b/validation/backend/function-ptr.c new file mode 100644 index 00000000..fc022b3c --- /dev/null +++ b/validation/backend/function-ptr.c @@ -0,0 +1,11 @@ +typedef int (*fn_t)(int x, int y); + +static int run(fn_t fn, int x, int y) +{ + return fn(x, y); +} + +/* + * check-name: Function pointer code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/hello.c b/validation/backend/hello.c new file mode 100644 index 00000000..79905004 --- /dev/null +++ b/validation/backend/hello.c @@ -0,0 +1,13 @@ +#include <stdio.h> + +int main(int argc, char *argv[]) +{ + puts("hello, world"); + + return 0; +} + +/* + * check-name: 'hello, world' code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/int-cond.c b/validation/backend/int-cond.c new file mode 100644 index 00000000..48b25a77 --- /dev/null +++ b/validation/backend/int-cond.c @@ -0,0 +1,30 @@ +static long foo(long a, long b, long c) +{ + return a? b:c; +} + +static long foo_bool(_Bool a, long b, long c) +{ + return a? b:c; +} + +static long bar(long a, long b, long c) +{ + if (a) + return b; + else + return b + c; +} + +static long bar_bool(_Bool a, long b, long c) +{ + if (a) + return b; + else + return b + c; +} + +/* + * check-name: Non-bool condition values in branch/select + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/load-type.c b/validation/backend/load-type.c new file mode 100644 index 00000000..80416cad --- /dev/null +++ b/validation/backend/load-type.c @@ -0,0 +1,12 @@ +extern struct _IO_FILE *stdin; + +static void sub(struct _IO_FILE *in) {} + +static void test(void) { + sub(stdin); +} + +/* + * check-name: Type of loaded objects + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/logical-ops.c b/validation/backend/logical-ops.c new file mode 100644 index 00000000..8b2a6a85 --- /dev/null +++ b/validation/backend/logical-ops.c @@ -0,0 +1,24 @@ +static int and_bool(int x, int y) +{ + return x && y; +} + +static unsigned int uand_bool(unsigned int x, unsigned int y) +{ + return x && y; +} + +static int or_bool(int x, int y) +{ + return x || y; +} + +static unsigned int uor_bool(unsigned int x, unsigned int y) +{ + return x || y; +} + +/* + * check-name: Logical operator code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/loop.c b/validation/backend/loop.c new file mode 100644 index 00000000..31054f52 --- /dev/null +++ b/validation/backend/loop.c @@ -0,0 +1,21 @@ + +extern int bar (int); + +extern int foo (int); + +int foo (int x) +{ + int y = 0; + + while (y < 1000) { + y += bar(x); + } + + return y; +} + + +/* + * check-name: Loops + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/ptrcast.c b/validation/backend/ptrcast.c new file mode 100644 index 00000000..46f8add8 --- /dev/null +++ b/validation/backend/ptrcast.c @@ -0,0 +1,9 @@ +static char *ptrcast(unsigned long *x) +{ + return (unsigned char *) x; +} + +/* + * check-name: Pointer cast code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/struct.c b/validation/backend/struct.c new file mode 100644 index 00000000..1afaf2db --- /dev/null +++ b/validation/backend/struct.c @@ -0,0 +1,19 @@ +struct ctype { + int type; +}; + +struct symbol { + void *p; + const char *name; + struct ctype ctype; + struct symbol *next_id; +}; + +static struct symbol sym; +static struct symbol *sym_p; +static struct symbol *sym_q = &sym; + +/* + * check-name: Struct code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/union.c b/validation/backend/union.c new file mode 100644 index 00000000..e155f6ad --- /dev/null +++ b/validation/backend/union.c @@ -0,0 +1,12 @@ +union foo { + unsigned long x; + unsigned char y; + char buf[128]; +}; + +static union foo foo; + +/* + * check-name: Union code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/backend/void-return-type.c b/validation/backend/void-return-type.c new file mode 100644 index 00000000..b282fdee --- /dev/null +++ b/validation/backend/void-return-type.c @@ -0,0 +1,13 @@ +static void foo(void) +{ +} + +static void *bar(void *p) +{ + return p; +} + +/* + * check-name: void return type code generation + * check-command: ./sparsec -c $file -o tmp.o + */ diff --git a/validation/goto-label.c b/validation/goto-label.c new file mode 100644 index 00000000..1196fdef --- /dev/null +++ b/validation/goto-label.c @@ -0,0 +1,29 @@ +void foo(void) +{ + goto a; +a: +a: + return; +} + +void g(void) +{ + goto a; +a: + return; +} + +void bar(void) +{ + goto neverland; +} + +/* + * check-name: goto labels + * + * check-error-start +goto-label.c:5:1: error: label 'a' redefined +goto-label.c:18:9: error: label 'neverland' was not declared + * check-error-end + */ + |
