aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
authorRob Taylor <rob.taylor@codethink.co.uk>2007-07-02 13:27:46 +0100
committerJosh Triplett <josh@freedesktop.org>2007-07-13 23:22:46 -0700
commitf744a1419d9264eaa3d8c8b9145488429b19d4a1 (patch)
tree80b812e9e07adbb1b3ad727ce8f10bee68942a46
parent29753cad1490a9ddaaac3de6c541cd973608ed09 (diff)
downloadsparse-dev-f744a1419d9264eaa3d8c8b9145488429b19d4a1.tar.gz
add c2xml program
Adds new c2xml program which dumps out the parse tree for a given file as well-formed xml. A DTD for the format is included as parse.dtd. Signed-off-by: Rob Taylor <rob.taylor@codethink.co.uk> [Josh: DTD fixes] Signed-off-by: Josh Triplett <josh@freedesktop.org>
-rw-r--r--Makefile15
-rw-r--r--c2xml.c324
-rw-r--r--parse.dtd51
3 files changed, 390 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 5aa5cd63..cedf0925 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,8 @@ CFLAGS=-O -g -Wall -Wwrite-strings -fpic
LDFLAGS=-g
AR=ar
+HAVE_LIBXML=$(shell pkg-config --exists libxml-2.0 && echo 'yes')
+
#
# For debugging, uncomment the next one
#
@@ -21,8 +23,15 @@ PKGCONFIGDIR=$(LIBDIR)/pkgconfig
PROGRAMS=test-lexing test-parsing obfuscate compile graph sparse test-linearize example \
test-unssa test-dissect ctags
+
+
INST_PROGRAMS=sparse cgcc
+ifeq ($(HAVE_LIBXML),yes)
+PROGRAMS+=c2xml
+INST_PROGRAMS+=c2xml
+endif
+
LIB_H= token.h parse.h lib.h symbol.h scope.h expression.h target.h \
linearize.h bitmap.h ident-list.h compat.h flow.h allocate.h \
storage.h ptrlist.h dissect.h
@@ -107,6 +116,12 @@ test-dissect: test-dissect.o $(LIBS)
ctags: ctags.o $(LIBS)
$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< $(LIBS)
+ifeq ($(HAVE_LIBXML),yes)
+c2xml: c2xml.c $(LIBS) $(LIB_H)
+ $(CC) $(LDFLAGS) `pkg-config --cflags --libs libxml-2.0` -o $@ $< $(LIBS)
+
+endif
+
$(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(AR) rcs $@ $(LIB_OBJS)
diff --git a/c2xml.c b/c2xml.c
new file mode 100644
index 00000000..62417982
--- /dev/null
+++ b/c2xml.c
@@ -0,0 +1,324 @@
+/*
+ * Sparse c2xml
+ *
+ * Dumps the parse tree as an xml document
+ *
+ * Copyright (C) 2007 Rob Taylor
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include "parse.h"
+#include "scope.h"
+#include "symbol.h"
+
+xmlDocPtr doc = NULL; /* document pointer */
+xmlNodePtr root_node = NULL;/* root node pointer */
+xmlDtdPtr dtd = NULL; /* DTD pointer */
+xmlNsPtr ns = NULL; /* namespace pointer */
+int idcount = 0;
+
+static struct symbol_list *taglist = NULL;
+
+static void examine_symbol(struct symbol *sym, xmlNodePtr node);
+
+static xmlAttrPtr newNumProp(xmlNodePtr node, const xmlChar * name, int value)
+{
+ char buf[256];
+ snprintf(buf, 256, "%d", value);
+ return xmlNewProp(node, name, buf);
+}
+
+static xmlAttrPtr newIdProp(xmlNodePtr node, const xmlChar * name, unsigned int id)
+{
+ char buf[256];
+ snprintf(buf, 256, "_%d", id);
+ return xmlNewProp(node, name, buf);
+}
+
+static xmlNodePtr new_sym_node(struct symbol *sym, const char *name, xmlNodePtr parent)
+{
+ xmlNodePtr node;
+ const char *ident = show_ident(sym->ident);
+
+ assert(name != NULL);
+ assert(sym != NULL);
+ assert(parent != NULL);
+
+ node = xmlNewChild(parent, NULL, "symbol", NULL);
+
+ xmlNewProp(node, "type", name);
+
+ newIdProp(node, "id", idcount);
+
+ if (sym->ident && ident)
+ xmlNewProp(node, "ident", ident);
+ xmlNewProp(node, "file", stream_name(sym->pos.stream));
+
+ newNumProp(node, "start-line", sym->pos.line);
+ newNumProp(node, "start-col", sym->pos.pos);
+
+ if (sym->endpos.type) {
+ newNumProp(node, "end-line", sym->endpos.line);
+ newNumProp(node, "end-col", sym->endpos.pos);
+ if (sym->pos.stream != sym->endpos.stream)
+ xmlNewProp(node, "end-file", stream_name(sym->endpos.stream));
+ }
+ sym->aux = node;
+
+ idcount++;
+
+ return node;
+}
+
+static inline void examine_members(struct symbol_list *list, xmlNodePtr node)
+{
+ struct symbol *sym;
+ xmlNodePtr child;
+ char buf[256];
+
+ FOR_EACH_PTR(list, sym) {
+ examine_symbol(sym, node);
+ } END_FOR_EACH_PTR(sym);
+}
+
+static void examine_modifiers(struct symbol *sym, xmlNodePtr node)
+{
+ const char *modifiers[] = {
+ "auto",
+ "register",
+ "static",
+ "extern",
+ "const",
+ "volatile",
+ "signed",
+ "unsigned",
+ "char",
+ "short",
+ "long",
+ "long-long",
+ "typedef",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ "inline",
+ "addressable",
+ "nocast",
+ "noderef",
+ "accessed",
+ "toplevel",
+ "label",
+ "assigned",
+ "type-type",
+ "safe",
+ "user-type",
+ "force",
+ "explicitly-signed",
+ "bitwise"};
+
+ int i;
+
+ if (sym->namespace != NS_SYMBOL)
+ return;
+
+ /*iterate over the 32 bit bitfield*/
+ for (i=0; i < 32; i++) {
+ if ((sym->ctype.modifiers & 1<<i) && modifiers[i])
+ xmlNewProp(node, modifiers[i], "1");
+ }
+}
+
+static void
+examine_layout(struct symbol *sym, xmlNodePtr node)
+{
+ char buf[256];
+
+ examine_symbol_type(sym);
+
+ newNumProp(node, "bit-size", sym->bit_size);
+ newNumProp(node, "alignment", sym->ctype.alignment);
+ newNumProp(node, "offset", sym->offset);
+ if (is_bitfield_type(sym)) {
+ newNumProp(node, "bit-offset", sym->bit_offset);
+ }
+}
+
+static void examine_symbol(struct symbol *sym, xmlNodePtr node)
+{
+ xmlNodePtr child = NULL;
+ const char *base;
+ int array_size;
+ char buf[256];
+
+ if (!sym)
+ return;
+ if (sym->aux) /*already visited */
+ return;
+
+ if (sym->ident && sym->ident->reserved)
+ return;
+
+ child = new_sym_node(sym, get_type_name(sym->type), node);
+ examine_modifiers(sym, child);
+ examine_layout(sym, child);
+
+ if (sym->ctype.base_type) {
+ if ((base = builtin_typename(sym->ctype.base_type)) == NULL) {
+ if (!sym->ctype.base_type->aux) {
+ examine_symbol(sym->ctype.base_type, root_node);
+ }
+ xmlNewProp(child, "base-type",
+ xmlGetProp((xmlNodePtr)sym->ctype.base_type->aux, "id"));
+ } else {
+ xmlNewProp(child, "base-type-builtin", base);
+ }
+ }
+ if (sym->array_size) {
+ /* TODO: modify get_expression_value to give error return */
+ array_size = get_expression_value(sym->array_size);
+ newNumProp(child, "array-size", array_size);
+ }
+
+
+ switch (sym->type) {
+ case SYM_STRUCT:
+ case SYM_UNION:
+ examine_members(sym->symbol_list, child);
+ break;
+ case SYM_FN:
+ examine_members(sym->arguments, child);
+ break;
+ case SYM_UNINITIALIZED:
+ xmlNewProp(child, "base-type-builtin", builtin_typename(sym));
+ break;
+ }
+ return;
+}
+
+static struct position *get_expansion_end (struct token *token)
+{
+ struct token *p1, *p2;
+
+ for (p1=NULL, p2=NULL;
+ !eof_token(token);
+ p2 = p1, p1 = token, token = token->next);
+
+ if (p2)
+ return &(p2->pos);
+ else
+ return NULL;
+}
+
+static void examine_macro(struct symbol *sym, xmlNodePtr node)
+{
+ xmlNodePtr child;
+ struct position *pos;
+ char buf[256];
+
+ /* this should probably go in the main codebase*/
+ pos = get_expansion_end(sym->expansion);
+ if (pos)
+ sym->endpos = *pos;
+ else
+ sym->endpos = sym->pos;
+
+ child = new_sym_node(sym, "macro", node);
+}
+
+static void examine_namespace(struct symbol *sym)
+{
+ xmlChar *namespace_type = NULL;
+
+ if (sym->ident && sym->ident->reserved)
+ return;
+
+ switch(sym->namespace) {
+ case NS_MACRO:
+ examine_macro(sym, root_node);
+ break;
+ case NS_TYPEDEF:
+ case NS_STRUCT:
+ case NS_SYMBOL:
+ examine_symbol(sym, root_node);
+ break;
+ case NS_NONE:
+ case NS_LABEL:
+ case NS_ITERATOR:
+ case NS_UNDEF:
+ case NS_PREPROCESSOR:
+ case NS_KEYWORD:
+ break;
+ default:
+ die("Unrecognised namespace type %d",sym->namespace);
+ }
+
+}
+
+static int get_stream_id (const char *name)
+{
+ int i;
+ for (i=0; i<input_stream_nr; i++) {
+ if (strcmp(name, stream_name(i))==0)
+ return i;
+ }
+ return -1;
+}
+
+static inline void examine_symbol_list(const char *file, struct symbol_list *list)
+{
+ struct symbol *sym;
+ int stream_id = get_stream_id (file);
+
+ if (!list)
+ return;
+ FOR_EACH_PTR(list, sym) {
+ if (sym->pos.stream == stream_id)
+ examine_namespace(sym);
+ } END_FOR_EACH_PTR(sym);
+}
+
+int main(int argc, char **argv)
+{
+ struct string_list *filelist = NULL;
+ struct symbol_list *symlist = NULL;
+ char *file;
+
+ doc = xmlNewDoc("1.0");
+ root_node = xmlNewNode(NULL, "parse");
+ xmlDocSetRootElement(doc, root_node);
+
+/* - A DTD is probably unnecessary for something like this
+
+ dtd = xmlCreateIntSubset(doc, "parse", "http://www.kernel.org/pub/software/devel/sparse/parse.dtd" NULL, "parse.dtd");
+
+ ns = xmlNewNs (root_node, "http://www.kernel.org/pub/software/devel/sparse/parse.dtd", NULL);
+
+ xmlSetNs(root_node, ns);
+*/
+ symlist = sparse_initialize(argc, argv, &filelist);
+
+ FOR_EACH_PTR_NOTAG(filelist, file) {
+ examine_symbol_list(file, symlist);
+ sparse_keep_tokens(file);
+ examine_symbol_list(file, file_scope->symbols);
+ examine_symbol_list(file, global_scope->symbols);
+ } END_FOR_EACH_PTR_NOTAG(file);
+
+
+ xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
+ xmlFreeDoc(doc);
+ xmlCleanupParser();
+
+ return 0;
+}
+
diff --git a/parse.dtd b/parse.dtd
new file mode 100644
index 00000000..cbf95ec1
--- /dev/null
+++ b/parse.dtd
@@ -0,0 +1,51 @@
+<!ELEMENT parse (symbol+) >
+
+<!ELEMENT symbol (symbol*) >
+
+<!ATTLIST symbol type (uninitialized|preprocessor|basetype|node|pointer|function|array|struct|union|enum|typedef|typeof|member|bitfield|label|restrict|fouled|keyword|bad) #REQUIRED
+ id ID #REQUIRED
+ file CDATA #REQUIRED
+ start-line CDATA #REQUIRED
+ start-col CDATA #REQUIRED
+ end-line CDATA #IMPLIED
+ end-col CDATA #IMPLIED
+ end-file CDATA #IMPLIED
+
+ ident CDATA #IMPLIED
+ base-type IDREF #IMPLIED
+ base-type-builtin (char|signed char|unsigned char|short|signed short|unsigned short|int|signed int|unsigned int|signed long|long|unsigned long|long long|signed long long|unsigned long long|void|bool|string|float|double|long double|incomplete type|abstract int|abstract fp|label type|bad type) #IMPLIED
+
+ array-size CDATA #IMPLIED
+
+ bit-size CDATA #IMPLIED
+ alignment CDATA #IMPLIED
+ offset CDATA #IMPLIED
+ bit-offset CDATA #IMPLIED
+
+ auto (0|1) #IMPLIED
+ register (0|1) #IMPLIED
+ static (0|1) #IMPLIED
+ extern (0|1) #IMPLIED
+ const (0|1) #IMPLIED
+ volatile (0|1) #IMPLIED
+ signed (0|1) #IMPLIED
+ unsigned (0|1) #IMPLIED
+ char (0|1) #IMPLIED
+ short (0|1) #IMPLIED
+ long (0|1) #IMPLIED
+ long-long (0|1) #IMPLIED
+ typedef (0|1) #IMPLIED
+ inline (0|1) #IMPLIED
+ addressable (0|1) #IMPLIED
+ nocast (0|1) #IMPLIED
+ noderef (0|1) #IMPLIED
+ accessed (0|1) #IMPLIED
+ toplevel (0|1) #IMPLIED
+ label (0|1) #IMPLIED
+ assigned (0|1) #IMPLIED
+ type-type (0|1) #IMPLIED
+ safe (0|1) #IMPLIED
+ usertype (0|1) #IMPLIED
+ force (0|1) #IMPLIED
+ explicitly-signed (0|1) #IMPLIED
+ bitwise (0|1) #IMPLIED >