Bug 972939 - Add documentation for mozbase structured logging. r=wlach

2024-09-13 09:24:08 -07:00 · 2014-02-25 17:32:07 +00:00 · 2014-02-25 17:32:07 +00:00 · 09de4092cb
commit 09de4092cb
parent 34a1f3a40c
9 changed files with 361 additions and 23 deletions
--- a/testing/mozbase/docs/loggingreporting.rst
+++ b/testing/mozbase/docs/loggingreporting.rst
@ -9,4 +9,4 @@ less verbose. We created some libraries to make doing this easy.
   :maxdepth: 2

   mozlog
-
+   mozlog_structured
--- a/testing/mozbase/docs/mozlog_structured.rst
+++ b/testing/mozbase/docs/mozlog_structured.rst
@ -0,0 +1,270 @@
+:mod:`mozlog.structured` --- Structured logging for test output
+===============================================================
+
+``mozlog.structured`` is a library designed for logging the execution
+and results of test harnesses. The canonical output format is JSON,
+with one line of JSON per log entry. It is *not* based on the stdlib
+logging module, although it shares several concepts with this module.
+
+One notable difference between this module and the standard logging
+module is the way that loggers are created. The structured logging
+module does not require that loggers with a specific name are
+singleton objects accessed through a factory function. Instead the
+``StructuredLogger`` constructor may be used directly. However all
+loggers with the same name share the same internal state (the "Borg"
+pattern). In particular the list of handlers functions is the same for
+all loggers with the same name.
+
+Logging is threadsafe, with access to handlers protected by a
+``threading.Lock``. However it is `not` process-safe. This means that
+applications using multiple processes, e.g. via the
+``multiprocessing`` module, should arrange for all logging to happen in
+a single process.
+
+Data Format
+-----------
+
+Structured loggers produce messages in a simple format designed to be
+compatible with the JSON data model. Each message is a single object,
+with the type of message indicated by the ``action`` key. It is
+intended that the set of ``action`` values be closed; where there are
+use cases for additional values they should be integrated into this
+module rather than extended in an ad-hoc way. The set of keys present
+on on all messages is:
+
+``action``
+  The type of the message (string).
+
+``time``
+  The timestamp of the message in ms since the epoch (int).
+
+``thread``
+  The name of the thread emitting the message (string).
+
+``pid``
+  The pid of the process creating the message (int).
+
+``source``
+  Name of the logger creating the message (string).
+
+For each ``action`` there are is a further set of specific fields
+describing the details of the event that caused the message to be
+emitted:
+
+``suite_start``
+  Emitted when the testsuite starts running.
+
+  ``tests``
+    A list of test_ids (list).
+
+``suite_end``
+  Emitted when the testsuite is finished and no more results will be produced.
+
+``test_start``
+  Emitted when a test is being started.
+
+  ``test``
+    A unique id for the test (string or list of strings).
+
+``test_status``
+  Emitted for a test which has subtests to record the result of a
+  single subtest
+
+  ``test``
+    The same unique id for the test as in the ``test_start`` message.
+
+  ``subtest``
+    Name of the subtest (string).
+
+  ``status``
+    Result of the test (string enum; ``PASS``, ``FAIL``, ``TIMEOUT``,
+    ``NOTRUN``)
+
+  ``expected``
+    Expected result of the test. Omitted if the expected result is the
+    same as the actual result (string enum, same as ``status``).
+
+``test_end``
+  Emitted to give the result of a test with no subtests, or the status
+  of the overall file when there are subtests.
+
+  ``test``
+    The same unique id for the test as in the ``test_start`` message.
+
+  ``status``
+    Either result of the test (if there are no subtests) in which case
+    (string enum ``PASS``, ``FAIL``, ``TIMEOUT``, ``CRASH``,
+    ``ASSERT``, ``SKIP``) or the status of the overall file where
+    there are subtests (string enum ``OK``, ``ERROR``, ``TIMEOUT``,
+    ``CRASH``, ``ASSERT``, ``SKIP``).
+
+  ``expected``
+    The expected status, or emitted if the expected status matches the
+    actual status (string enum, same as ``status``).
+
+``process_output``
+  Output from a managed subprocess.
+
+  ``process``
+  pid of the subprocess.
+
+  ``command``
+  Command used to launch the subprocess.
+
+  ``data``
+  Data output by the subprocess.
+
+``log``
+  General human-readable logging message, used to debug the harnesses
+  themselves rather than to provide input to other tools.
+
+  ``level``
+    Level of the log message (string enum ``CRITICAL``, ``ERROR``,
+    ``WARNING``, ``INFO``, ``DEBUG``).
+
+  ``message``
+    Text of the log message.
+
+StructuredLogger Objects
+------------------------
+
+.. automodule:: mozlog.structured.structuredlog
+
+.. autoclass:: StructuredLogger
+   :members: add_handler, remove_handler, handlers, suite_start,
+             suite_end, test_start, test_status, test_end,
+             process_output, critical, error, warning, info, debug
+
+.. autoclass:: StructuredLogFileLike
+  :members:
+
+Handlers
+--------
+
+A handler is a callable that is called for each log message produced
+and is responsible for handling the processing of that
+message. The typical example of this is a ``StreamHandler`` which takes
+a log message, invokes a formatter which converts the log to a string,
+and writes it to a file.
+
+.. automodule:: mozlog.structured.handlers
+
+.. autoclass:: StreamHandler
+  :members:
+
+.. autoclass:: LogLevelFilter
+  :members:
+
+Formatters
+----------
+
+Formatters are callables that take a log message, and return either a
+string representation of that message, or ``None`` if that message
+should not appear in the output. This allows formatters to both
+exclude certain items and create internal buffers of the output so
+that, for example, a single string might be returned for a
+``test_end`` message indicating the overall result of the test,
+including data provided in the ``test_status`` messages.
+
+Formatter modules are written so that they can take raw input on stdin
+and write formatted output on stdout. This allows the formatters to be
+invoked as part of a command line for post-processing raw log files.
+
+.. automodule:: mozlog.structured.formatters.base
+
+.. autoclass:: BaseFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.unittest
+
+.. autoclass:: UnittestFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.xunit
+
+.. autoclass:: XUnitFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.html
+
+.. autoclass:: HTMLFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.machformatter
+
+.. autoclass:: MachFormatter
+  :members:
+
+.. autoclass:: MachTerminalFormatter
+  :members:
+
+Processing Log Files
+--------------------
+
+The ``mozlog.structured.reader`` module provides utilities for working
+with structured log files.
+
+.. automodule:: mozlog.structured.reader
+  :members:
+
+Integration with argparse
+-------------------------
+
+The `mozlog.structured.commandline` module provides integration with
+the `argparse` module to provide uniform logging-related command line
+arguments to programs using `mozlog.structured`. Each known formatter
+gets a command line argument of the form ``--log-{name}``, which takes
+the name of a file to log to with that format of `-` to indicate stdout.
+
+.. automodule:: mozlog.structured.commandline
+  :members:
+
+Examples
+--------
+
+Log to stdout::
+
+    from mozlog.structured import structuredlog
+    from mozlog.structured import handlers, formatters
+    logger = structuredlog.StructuredLogger("my-test-suite")
+    logger.add_handler(handlers.StreamHandler(sys.stdout,
+                                              formatters.JSONFormatter()))
+    logger.suite_start(["test-id-1"])
+    logger.test_start("test-id-1")
+    logger.info("This is a message with action='LOG' and level='INFO'")
+    logger.test_status("test-id-1", "subtest-1", "PASS")
+    logger.test_end("test-id-1", "OK")
+    logger.suite_end()
+
+
+Populate an ``argparse.ArgumentParser`` with logging options, and
+create a logger based on the value of those options, defaulting to
+JSON output on stdout if nothing else is supplied::
+
+   import argparse
+   from mozlog.structured import commandline
+
+   parser = argparse.ArgumentParser()
+   # Here one would populate the parser with other options
+   commandline.add_logging_group(parser)
+
+   args = parser.parse_args()
+   logger = commandline.setup_logging("testsuite-name", args, {"raw": sys.stdout})
+
+Count the number of tests that timed out in a testsuite::
+
+   from mozlog.structured import reader
+
+   class TestCounter(object):
+       def count(filename):
+           self.count = 0
+           with open(filename) as f:
+               reader.map_action(reader.read(f),
+                                 {"test_end":self.process_test_end})
+          return self.count
+
+       def process_test_end(self, data):
+           if data["status"] == "TIMEOUT":
+               self.count += 1
+
+   print TestCounter().count("my_test_run.log")
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/base.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/base.py
@ -6,6 +6,21 @@ import json


 class BaseFormatter(object):
+    """Base class for implementing non-trivial formatters.
+
+    Subclasses are expected to provide a method for each action type they
+    wish to handle, each taking a single argument for the test data.
+    For example a trivial subclass that just produces the id of each test as
+    it starts might be::
+
+      class StartIdFormatter(BaseFormatter);
+          def test_start(data):
+              #For simplicity in the example pretend the id is always a string
+              return data["test"]
+    """
+    def __init__(self):
+        pass
+
    def __call__(self, data):
        if hasattr(self, data["action"]):
            handler = getattr(self, data["action"])
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
@ -93,6 +93,16 @@ class BaseMachFormatter(base.BaseFormatter):


 class MachFormatter(BaseMachFormatter):
+    """Formatter designed for producing human-redable output
+    when tests are running. This is principally intended for integration with
+    the ``mach`` command dispatch framework.
+
+    :param start_time: time.time() at which the testrun started
+    :param write_interval: bool indicating whether to include the interval since the
+                           last message
+    :param write_times: bool indicating whether to include the time since the testrun
+                        started.
+    """
    def __call__(self, data):
        s = BaseMachFormatter.__call__(self, data)
        if s is not None:
@ -100,6 +110,17 @@ class MachFormatter(BaseMachFormatter):


 class MachTerminalFormatter(BaseMachFormatter):
+    """Formatter designed to produce coloured output in a terminal when tests are
+    running. This is principally intended for integration with
+    the ``mach`` command dispatch framework.
+
+    :param start_time: time.time() at which the testrun started
+    :param write_interval: bool indicating whether to include the interval since the
+                           last message
+    :param write_times: bool indicating whether to include the time since the testrun
+                        started.
+    :param terminal: Terminal object from mach.
+    """
    def __init__(self, start_time=None, write_interval=False, write_times=True,
                 terminal=None):
        self.terminal = terminal
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/unittest.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/unittest.py
@ -10,6 +10,8 @@ from .. import handlers


 class UnittestFormatter(base.BaseFormatter):
+    """Formatter designed to produce output in a format like that used by
+    the ``unittest`` module in the standard library."""
    def __init__(self):
        self.fails = []
        self.errors = []
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/xunit.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/xunit.py
@ -16,9 +16,18 @@ def format_test_id(test_id):


 class XUnitFormatter(base.BaseFormatter):
-    """The data model here isn't a great match. This implementation creates
-    one <testcase> element for each subtest and one more, with no @name
-    for each test"""
+    """Formatter that produces XUnit-style XML output.
+
+    The tree is created in-memory so this formatter may be problematic
+    with very large log files.
+
+    Note that the data model isn't a perfect match. In
+    particular XUnit assumes that each test has a unittest-style
+    class name and function name, which isn't the case for us. The
+    implementation currently replaces path names with something that
+    looks like class names, but this doesn't work for test types that
+    actually produce class names, or for test types that have multiple
+    components in their test id (e.g. reftests)."""

    def __init__(self):
        self.tree = ElementTree.ElementTree()
--- a/testing/mozbase/mozlog/mozlog/structured/handlers/init.py
+++ b/testing/mozbase/mozlog/mozlog/structured/handlers/init.py
@ -21,6 +21,12 @@ class BaseHandler(object):


 class LogLevelFilter(object):
+    """Handler that filters out messages with action:log and a level
+    lower than some specified level.
+
+    :param inner: Handler to use for messages that pass this filter
+    :param level: Minimum log level to process
+    """
    def __init__(self, inner, level):
        self.inner = inner
        self.level = log_levels[level.upper()]
@ -32,6 +38,12 @@ class LogLevelFilter(object):


 class StreamHandler(BaseHandler):
+    """Handler for writing to a file-like object
+
+    :param stream: File-like object to write log messages to
+    :param formatter: formatter to convert messages to string format
+    """
+
    _lock = Lock()

    def __init__(self,  stream, formatter):
@ -40,6 +52,9 @@ class StreamHandler(BaseHandler):
        BaseHandler.__init__(self, formatter)

    def __call__(self, data):
+        """Write a log message.
+
+        :param data: Structured log message dictionary."""
        formatted = self.formatter(data)
        if not formatted:
            return
--- a/testing/mozbase/mozlog/mozlog/structured/reader.py
+++ b/testing/mozbase/mozlog/mozlog/structured/reader.py
@ -4,9 +4,11 @@
 import json

 def read(log_f, raise_on_error=False):
-    """Return a generator that will return the entries in a structured log file
+    """Return a generator that will return the entries in a structured log file.
+    Note that the caller must not close the file whilst the generator is still
+    in use.

-    :param log_f: file-like object containing the log enteries, one per line
+    :param log_f: file-like object containing the raw log entries, one per line
    :param raise_on_error: boolean indicating whether ValueError should be raised
                           for lines that cannot be decoded."""
    for line in log_f:
--- a/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
+++ b/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
@ -62,19 +62,20 @@ log_levels = dict((k.upper(), v) for v, k in
 class StructuredLogger(object):
    _lock = Lock()
    _handlers = defaultdict(list)
+    """Create a structured logger with the given name
+
+    :param name: The name of the logger.
+    """

    def __init__(self, name):
-        """
-        Create a structured logger with the given name
-
-        :param name: The name of the logger.
-        """
        self.name = name

    def add_handler(self, handler):
+        """Add a handler to the current logger"""
        self._handlers[self.name].append(handler)

    def remove_handler(self, handler):
+        """Remove a handler from the current logger"""
        for i, candidate_handler in enumerate(self._handlers[self.name][:]):
            if candidate_handler == handler:
                del self._handlers[self.name][i]
@ -82,7 +83,7 @@ class StructuredLogger(object):

    @property
    def handlers(self):
-        """Get a list of handlers that will be called when a
+        """A list of handlers that will be called when a
        message is logged from this logger"""
        return self._handlers[self.name]

@ -104,8 +105,7 @@ class StructuredLogger(object):
        return all_data

    def suite_start(self, tests):
-        """
-        Log a suite_start message
+        """Log a suite_start message

        :param tests: List of test identifiers that will be run in the suite.
        """
@ -116,8 +116,7 @@ class StructuredLogger(object):
        self._log_data("suite_end")

    def test_start(self, test):
-        """
-        "Log a test_start message
+        """Log a test_start message

        :param test: Identifier of the test that will run.
        """
@ -188,12 +187,14 @@ class StructuredLogger(object):


 def _log_func(level_name):
-    def log(self, message, params=None):
-        if params is None:
-            params = {}
+    def log(self, message):
        data = {"level": level_name, "message": message}
-        data.update(params)
        self._log_data("log", data)
+    log.__doc__ = """Log a message with level %s
+
+:param message: The string message to log
+""" % level_name
+    log.__name__ = str(level_name).lower()
    return log


@ -203,13 +204,16 @@ for level_name in log_levels:


 class StructuredLogFileLike(object):
-    """
-    Wrapper for file like objects to redirect output to logger
-    instead.
+    """Wrapper for file-like objects to redirect writes to logger
+    instead. Each call to `write` becomes a single log entry of type `log`.

    When using this it is important that the callees i.e. the logging
    handlers do not themselves try to write to the wrapped file as this
    will cause infinite recursion.
+
+    :param logger: `StructuredLogger` to which to redirect the file write operations.
+    :param level: log level to use for each write.
+    :param prefix: String prefix to prepend to each log entry.
    """
    def __init__(self, logger, level="info", prefix=None):
        self.logger = logger