import collections
import csv
import json
import os
import urllib.request
HERE = os.path.dirname(os.path.abspath(__file__))
CSV_URL = "https://raw.githubusercontent.com/elastic/ecs/main/generated/csv/fields.csv"
CSV_PATH = os.path.join(HERE, "ecs_fields.csv")
OUT_PATH = os.path.join(HERE, "ecs_log.json")
def fetch_csv():
if not os.path.exists(CSV_PATH):
urllib.request.urlretrieve(CSV_URL, CSV_PATH)
return CSV_PATH
def kind_for(ecs_type):
t = (ecs_type or "").strip()
if t in ("long", "integer"):
return "integer"
if t in ("float", "double", "scaled_float", "half_float"):
return "float"
if t == "boolean":
return "boolean"
if t in ("object", "flattened", "nested", "geo_point"):
return "json"
# keyword, text, match_only_text, wildcard, constant_keyword, ip, date,
# version, ... -> string
return "string"
def build_values(csv_path):
values = collections.OrderedDict()
with open(csv_path, newline="") as f:
for row in csv.DictReader(f):
name = row["Field"].strip()
key = name.replace(".", "/")
if key in values:
continue
kind = kind_for(row["Type"])
entry = {"kind": kind}
leaf = name.rsplit(".", 1)[-1]
if leaf in ("id", "name") and kind == "string":
entry["identifier"] = True
values[key] = entry
return values
def build_format(values):
return {
"$schema": "https://lnav.org/schemas/format-v1.schema.json",
"ecs_log": {
"title": "ecs",
"description": "Elastic Common Schema (ECS) defines a common set of fields for ingesting data into Elasticsearch including log records",
"url": "https://www.elastic.co/guide/en/ecs-logging/overview/current/intro.html",
"file-type": "json",
"line-format": [
{"field": "__timestamp__", "auto-width": True, "align": "left",
"timestamp-format": "%y-%m-%d %H:%M:%S.%L"},
" ",
{"field": "log/logger", "auto-width": True, "align": "left"},
" ",
{"field": "log/level", "align": "left", "min-width": 4, "max-width": 4,
"overflow": "truncate", "prefix": "[", "suffix": "]"},
" ",
{"field": "message"},
],
"value": values,
"level-field": "log/level",
"thread-id-field": "process/thread/id",
"timestamp-field": "@timestamp",
"convert-to-local-time": True,
"hide-extra": True,
},
}
def main():
csv_path = fetch_csv()
values = build_values(csv_path)
fmt = build_format(values)
with open(OUT_PATH, "w") as f:
json.dump(fmt, f, indent=4)
f.write("\n")
print(f"fields: {len(values)}")
print(f"written: {OUT_PATH}")
if __name__ == "__main__":
main()
Current ECS format in lnav defines only 16 fields and there are 2732 fields defined in ECS 9.5.0-dev currently.
What is the preferred set of fields in this format to put in lnav?
Claude generated Python script creating lnav format definition based on official csv file with all ECS fields