Skip to content

Updating ECS format? #1711

Description

@piotr-dobrogost

Current ECS format in lnav defines only 16 fields and there are 2732 fields defined in ECS 9.5.0-dev currently.
What is the preferred set of fields in this format to put in lnav?

Claude generated Python script creating lnav format definition based on official csv file with all ECS fields
import collections
import csv
import json
import os
import urllib.request

HERE = os.path.dirname(os.path.abspath(__file__))

CSV_URL = "https://raw.githubusercontent.com/elastic/ecs/main/generated/csv/fields.csv"
CSV_PATH = os.path.join(HERE, "ecs_fields.csv")
OUT_PATH = os.path.join(HERE, "ecs_log.json")


def fetch_csv():
    if not os.path.exists(CSV_PATH):
        urllib.request.urlretrieve(CSV_URL, CSV_PATH)
    return CSV_PATH


def kind_for(ecs_type):
    t = (ecs_type or "").strip()
    if t in ("long", "integer"):
        return "integer"
    if t in ("float", "double", "scaled_float", "half_float"):
        return "float"
    if t == "boolean":
        return "boolean"
    if t in ("object", "flattened", "nested", "geo_point"):
        return "json"
    # keyword, text, match_only_text, wildcard, constant_keyword, ip, date,
    # version, ... -> string
    return "string"


def build_values(csv_path):
    values = collections.OrderedDict()
    with open(csv_path, newline="") as f:
        for row in csv.DictReader(f):
            name = row["Field"].strip()
            key = name.replace(".", "/")
            if key in values:
                continue
            kind = kind_for(row["Type"])
            entry = {"kind": kind}
            leaf = name.rsplit(".", 1)[-1]
            if leaf in ("id", "name") and kind == "string":
                entry["identifier"] = True
            values[key] = entry
    return values


def build_format(values):
    return {
        "$schema": "https://lnav.org/schemas/format-v1.schema.json",
        "ecs_log": {
            "title": "ecs",
            "description": "Elastic Common Schema (ECS) defines a common set of fields for ingesting data into Elasticsearch including log records",
            "url": "https://www.elastic.co/guide/en/ecs-logging/overview/current/intro.html",
            "file-type": "json",
            "line-format": [
                {"field": "__timestamp__", "auto-width": True, "align": "left",
                 "timestamp-format": "%y-%m-%d %H:%M:%S.%L"},
                " ",
                {"field": "log/logger", "auto-width": True, "align": "left"},
                " ",
                {"field": "log/level", "align": "left", "min-width": 4, "max-width": 4,
                 "overflow": "truncate", "prefix": "[", "suffix": "]"},
                " ",
                {"field": "message"},
            ],
            "value": values,
            "level-field": "log/level",
            "thread-id-field": "process/thread/id",
            "timestamp-field": "@timestamp",
            "convert-to-local-time": True,
            "hide-extra": True,
        },
    }


def main():
    csv_path = fetch_csv()
    values = build_values(csv_path)
    fmt = build_format(values)
    with open(OUT_PATH, "w") as f:
        json.dump(fmt, f, indent=4)
        f.write("\n")
    print(f"fields: {len(values)}")
    print(f"written: {OUT_PATH}")


if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions