gecko/toolkit/components/telemetry/histogram_tools.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import json
import math
import re

from collections import OrderedDict

def table_dispatch(kind, table, body):
    """Call body with table[kind] if it exists.  Raise an error otherwise."""
    if kind in table:
        return body(table[kind])
    else:
        raise BaseException, "don't know how to handle a histogram of kind %s" % kind

class DefinitionException(BaseException):
    pass

def check_numeric_limits(dmin, dmax, n_buckets):
    if type(dmin) != int:
        raise DefinitionException, "minimum is not a number"
    if type(dmax) != int:
        raise DefinitionException, "maximum is not a number"
    if type(n_buckets) != int:
        raise DefinitionException, "number of buckets is not a number"

def linear_buckets(dmin, dmax, n_buckets):
    check_numeric_limits(dmin, dmax, n_buckets)
    ret_array = [0] * n_buckets
    dmin = float(dmin)
    dmax = float(dmax)
    for i in range(1, n_buckets):
        linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
        ret_array[i] = int(linear_range + 0.5)
    return ret_array

def exponential_buckets(dmin, dmax, n_buckets):
    check_numeric_limits(dmin, dmax, n_buckets)
    log_max = math.log(dmax);
    bucket_index = 2;
    ret_array = [0] * n_buckets
    current = dmin
    ret_array[1] = current
    for bucket_index in range(2, n_buckets):
        log_current = math.log(current)
        log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
        log_next = log_current + log_ratio
        next_value = int(math.floor(math.exp(log_next) + 0.5))
        if next_value > current:
            current = next_value
        else:
            current = current + 1
        ret_array[bucket_index] = current
    return ret_array

always_allowed_keys = ['kind', 'description', 'cpp_guard', 'expires_in_version', "alert_emails"]

class Histogram:
    """A class for representing a histogram definition."""

    def __init__(self, name, definition):
        """Initialize a histogram named name with the given definition.
definition is a dict-like object that must contain at least the keys:

 - 'kind': The kind of histogram.  Must be one of 'boolean', 'flag',
   'enumerated', 'linear', or 'exponential'.
 - 'description': A textual description of the histogram.

The key 'cpp_guard' is optional; if present, it denotes a preprocessor
symbol that should guard C/C++ definitions associated with the histogram."""
        self.verify_attributes(name, definition)
        self._name = name
        self._description = definition['description']
        self._kind = definition['kind']
        self._cpp_guard = definition.get('cpp_guard')
        self._extended_statistics_ok = definition.get('extended_statistics_ok', False)
        self._expiration = definition.get('expires_in_version')
        self.compute_bucket_parameters(definition)
        table = { 'boolean': 'BOOLEAN',
                  'flag': 'FLAG',
                  'enumerated': 'LINEAR',
                  'linear': 'LINEAR',
                  'exponential': 'EXPONENTIAL' }
        table_dispatch(self.kind(), table,
                       lambda k: self._set_nsITelemetry_kind(k))

    def name(self):
        """Return the name of the histogram."""
        return self._name

    def description(self):
        """Return the description of the histogram."""
        return self._description

    def kind(self):
        """Return the kind of the histogram.
Will be one of 'boolean', 'flag', 'enumerated', 'linear', or 'exponential'."""
        return self._kind

    def expiration(self):
        """Return the expiration version of the histogram."""
        return self._expiration

    def nsITelemetry_kind(self):
        """Return the nsITelemetry constant corresponding to the kind of
the histogram."""
        return self._nsITelemetry_kind

    def _set_nsITelemetry_kind(self, kind):
        self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % kind

    def low(self):
        """Return the lower bound of the histogram.  May be a string."""
        return self._low

    def high(self):
        """Return the high bound of the histogram.  May be a string."""
        return self._high

    def n_buckets(self):
        """Return the number of buckets in the histogram.  May be a string."""
        return self._n_buckets

    def cpp_guard(self):
        """Return the preprocessor symbol that should guard C/C++ definitions
associated with the histogram.  Returns None if no guarding is necessary."""
        return self._cpp_guard

    def extended_statistics_ok(self):
        """Return True if gathering extended statistics for this histogram
is enabled."""
        return self._extended_statistics_ok

    def ranges(self):
        """Return an array of lower bounds for each bucket in the histogram."""
        table = { 'boolean': linear_buckets,
                  'flag': linear_buckets,
                  'enumerated': linear_buckets,
                  'linear': linear_buckets,
                  'exponential': exponential_buckets }
        return table_dispatch(self.kind(), table,
                              lambda p: p(self.low(), self.high(), self.n_buckets()))

    def compute_bucket_parameters(self, definition):
        table = {
            'boolean': Histogram.boolean_flag_bucket_parameters,
            'flag': Histogram.boolean_flag_bucket_parameters,
            'enumerated': Histogram.enumerated_bucket_parameters,
            'linear': Histogram.linear_bucket_parameters,
            'exponential': Histogram.exponential_bucket_parameters
            }
        table_dispatch(self.kind(), table,
                       lambda p: self.set_bucket_parameters(*p(definition)))

    def verify_attributes(self, name, definition):
        global always_allowed_keys
        general_keys = always_allowed_keys + ['low', 'high', 'n_buckets']

        table = {
            'boolean': always_allowed_keys,
            'flag': always_allowed_keys,
            'enumerated': always_allowed_keys + ['n_values'],
            'linear': general_keys,
            'exponential': general_keys + ['extended_statistics_ok']
            }
        table_dispatch(definition['kind'], table,
                       lambda allowed_keys: Histogram.check_keys(name, definition, allowed_keys))

        Histogram.check_expiration(name, definition)

    @staticmethod
    def check_expiration(name, definition):
        expiration = definition.get('expires_in_version')

        if not expiration:
            return

        if re.match(r'^[1-9][0-9]*$', expiration):
            expiration = expiration + ".0a1"
        elif re.match(r'^[1-9][0-9]*\.0$', expiration):
            expiration = expiration + "a1"

        definition['expires_in_version'] = expiration

    @staticmethod
    def check_keys(name, definition, allowed_keys):
        for key in definition.iterkeys():
            if key not in allowed_keys:
                raise KeyError, '%s not permitted for %s' % (key, name)

    def set_bucket_parameters(self, low, high, n_buckets):
        def try_to_coerce_to_number(v):
            try:
                return eval(v, {})
            except:
                return v
        self._low = try_to_coerce_to_number(low)
        self._high = try_to_coerce_to_number(high)
        self._n_buckets = try_to_coerce_to_number(n_buckets)

    @staticmethod
    def boolean_flag_bucket_parameters(definition):
        return (1, 2, 3)

    @staticmethod
    def linear_bucket_parameters(definition):
        return (definition.get('low', 1),
                definition['high'],
                definition['n_buckets'])

    @staticmethod
    def enumerated_bucket_parameters(definition):
        n_values = definition['n_values']
        return (1, n_values, "%s+1" % n_values)

    @staticmethod
    def exponential_bucket_parameters(definition):
        return (definition.get('low', 1),
                definition['high'],
                definition['n_buckets'])

def from_file(filename):
    """Return an iterator that provides a sequence of Histograms for
the histograms defined in filename.
    """
    with open(filename, 'r') as f:
        histograms = json.load(f, object_pairs_hook=OrderedDict)
        for (name, definition) in histograms.iteritems():
            yield Histogram(name, definition)
Bug 781531 - generate histogram information from JSON; r=taras 2012-08-24 12:54:55 -07:00			`# This Source Code Form is subject to the terms of the Mozilla Public`
			`# License, v. 2.0. If a copy of the MPL was not distributed with this`
			`# file, You can obtain one at http://mozilla.org/MPL/2.0/.`

Bug 922190 - Remove bundled copy of simplejson. r=gps --HG-- extra : rebase_source : 5d33bafacd732e6bfb7acb6bb4d171eabb7d258a 2013-10-01 07:32:58 -07:00			`import json`
Bug 748417 - double-check Python-generated range-information; r=taras 2012-08-27 13:47:32 -07:00			`import math`
Bug 742500 - Disable expired telemetry probes. r=vladan 2014-01-03 09:02:48 -08:00			`import re`
Bug 800557 - Build shouldn't depend on simplejson. r=froydnj, r=ted 2012-10-11 14:19:40 -07:00
Bug 922190 - Remove bundled copy of simplejson. r=gps --HG-- extra : rebase_source : 5d33bafacd732e6bfb7acb6bb4d171eabb7d258a 2013-10-01 07:32:58 -07:00			`from collections import OrderedDict`
Bug 781531 - generate histogram information from JSON; r=taras 2012-08-24 12:54:55 -07:00
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`def table_dispatch(kind, table, body):`
			`"""Call body with table[kind] if it exists. Raise an error otherwise."""`
			`if kind in table:`
Bug 748417 - double-check Python-generated range-information; r=taras 2012-08-27 13:47:32 -07:00			`return body(table[kind])`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`else:`
			`raise BaseException, "don't know how to handle a histogram of kind %s" % kind`

Bug 748417 - double-check Python-generated range-information; r=taras 2012-08-27 13:47:32 -07:00			`class DefinitionException(BaseException):`
			`pass`

			`def check_numeric_limits(dmin, dmax, n_buckets):`
			`if type(dmin) != int:`
			`raise DefinitionException, "minimum is not a number"`
			`if type(dmax) != int:`
			`raise DefinitionException, "maximum is not a number"`
			`if type(n_buckets) != int:`
			`raise DefinitionException, "number of buckets is not a number"`

			`def linear_buckets(dmin, dmax, n_buckets):`
			`check_numeric_limits(dmin, dmax, n_buckets)`
			`ret_array = [0] * n_buckets`
			`dmin = float(dmin)`
			`dmax = float(dmax)`
			`for i in range(1, n_buckets):`
			`linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)`
			`ret_array[i] = int(linear_range + 0.5)`
			`return ret_array`

			`def exponential_buckets(dmin, dmax, n_buckets):`
			`check_numeric_limits(dmin, dmax, n_buckets)`
			`log_max = math.log(dmax);`
			`bucket_index = 2;`
			`ret_array = [0] * n_buckets`
			`current = dmin`
			`ret_array[1] = current`
			`for bucket_index in range(2, n_buckets):`
			`log_current = math.log(current)`
			`log_ratio = (log_max - log_current) / (n_buckets - bucket_index)`
			`log_next = log_current + log_ratio`
			`next_value = int(math.floor(math.exp(log_next) + 0.5))`
			`if next_value > current:`
			`current = next_value`
			`else:`
			`current = current + 1`
			`ret_array[bucket_index] = current`
			`return ret_array`

Bug 1037494 - Add author field to Histograms.json; r=vladan 2014-07-28 00:53:00 -07:00			`always_allowed_keys = ['kind', 'description', 'cpp_guard', 'expires_in_version', "alert_emails"]`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00
			`class Histogram:`
			`"""A class for representing a histogram definition."""`

			`def __init__(self, name, definition):`
			`"""Initialize a histogram named name with the given definition.`
			`definition is a dict-like object that must contain at least the keys:`

			`- 'kind': The kind of histogram. Must be one of 'boolean', 'flag',`
			`'enumerated', 'linear', or 'exponential'.`
			`- 'description': A textual description of the histogram.`

			`The key 'cpp_guard' is optional; if present, it denotes a preprocessor`
			`symbol that should guard C/C++ definitions associated with the histogram."""`
			`self.verify_attributes(name, definition)`
			`self._name = name`
			`self._description = definition['description']`
			`self._kind = definition['kind']`
			`self._cpp_guard = definition.get('cpp_guard')`
Bug 837271 - part 2 - add extended_statistics_ok mechanism to histogram machinery; r=vladan 2013-02-13 07:51:24 -08:00			`self._extended_statistics_ok = definition.get('extended_statistics_ok', False)`
Bug 742500 - Disable expired telemetry probes. r=vladan 2014-01-03 09:02:48 -08:00			`self._expiration = definition.get('expires_in_version')`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`self.compute_bucket_parameters(definition)`
			`table = { 'boolean': 'BOOLEAN',`
			`'flag': 'FLAG',`
			`'enumerated': 'LINEAR',`
			`'linear': 'LINEAR',`
			`'exponential': 'EXPONENTIAL' }`
			`table_dispatch(self.kind(), table,`
			`lambda k: self._set_nsITelemetry_kind(k))`

			`def name(self):`
			`"""Return the name of the histogram."""`
			`return self._name`

			`def description(self):`
			`"""Return the description of the histogram."""`
			`return self._description`

			`def kind(self):`
			`"""Return the kind of the histogram.`
			`Will be one of 'boolean', 'flag', 'enumerated', 'linear', or 'exponential'."""`
			`return self._kind`

Bug 742500 - Disable expired telemetry probes. r=vladan 2014-01-03 09:02:48 -08:00			`def expiration(self):`
			`"""Return the expiration version of the histogram."""`
			`return self._expiration`

Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`def nsITelemetry_kind(self):`
			`"""Return the nsITelemetry constant corresponding to the kind of`
			`the histogram."""`
			`return self._nsITelemetry_kind`

			`def _set_nsITelemetry_kind(self, kind):`
			`self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % kind`

			`def low(self):`
			`"""Return the lower bound of the histogram. May be a string."""`
			`return self._low`

			`def high(self):`
			`"""Return the high bound of the histogram. May be a string."""`
			`return self._high`

			`def n_buckets(self):`
			`"""Return the number of buckets in the histogram. May be a string."""`
			`return self._n_buckets`

			`def cpp_guard(self):`
			`"""Return the preprocessor symbol that should guard C/C++ definitions`
			`associated with the histogram. Returns None if no guarding is necessary."""`
			`return self._cpp_guard`

Bug 837271 - part 2 - add extended_statistics_ok mechanism to histogram machinery; r=vladan 2013-02-13 07:51:24 -08:00			`def extended_statistics_ok(self):`
			`"""Return True if gathering extended statistics for this histogram`
			`is enabled."""`
			`return self._extended_statistics_ok`

Bug 748417 - double-check Python-generated range-information; r=taras 2012-08-27 13:47:32 -07:00			`def ranges(self):`
			`"""Return an array of lower bounds for each bucket in the histogram."""`
			`table = { 'boolean': linear_buckets,`
			`'flag': linear_buckets,`
			`'enumerated': linear_buckets,`
			`'linear': linear_buckets,`
			`'exponential': exponential_buckets }`
			`return table_dispatch(self.kind(), table,`
			`lambda p: p(self.low(), self.high(), self.n_buckets()))`

Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`def compute_bucket_parameters(self, definition):`
			`table = {`
			`'boolean': Histogram.boolean_flag_bucket_parameters,`
			`'flag': Histogram.boolean_flag_bucket_parameters,`
			`'enumerated': Histogram.enumerated_bucket_parameters,`
			`'linear': Histogram.linear_bucket_parameters,`
			`'exponential': Histogram.exponential_bucket_parameters`
			`}`
			`table_dispatch(self.kind(), table,`
			`lambda p: self.set_bucket_parameters(*p(definition)))`

			`def verify_attributes(self, name, definition):`
			`global always_allowed_keys`
			`general_keys = always_allowed_keys + ['low', 'high', 'n_buckets']`

			`table = {`
			`'boolean': always_allowed_keys,`
			`'flag': always_allowed_keys,`
			`'enumerated': always_allowed_keys + ['n_values'],`
			`'linear': general_keys,`
Bug 837271 - part 2 - add extended_statistics_ok mechanism to histogram machinery; r=vladan 2013-02-13 07:51:24 -08:00			`'exponential': general_keys + ['extended_statistics_ok']`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`}`
			`table_dispatch(definition['kind'], table,`
			`lambda allowed_keys: Histogram.check_keys(name, definition, allowed_keys))`

Bug 742500 - Disable expired telemetry probes. r=vladan 2014-01-03 09:02:48 -08:00			`Histogram.check_expiration(name, definition)`

			`@staticmethod`
			`def check_expiration(name, definition):`
Bug 956715 - Allow missing "expires_in_version" in histogram definition. r=froydnj 2014-01-09 10:20:12 -08:00			`expiration = definition.get('expires_in_version')`
Bug 742500 - Disable expired telemetry probes. r=vladan 2014-01-03 09:02:48 -08:00
			`if not expiration:`
			`return`

Bug 957156 - Expire telemetry histograms by default also in development channels. r=vladan 2014-01-09 10:20:41 -08:00			`if re.match(r'^[1-9][0-9]*$', expiration):`
			`expiration = expiration + ".0a1"`
			`elif re.match(r'^[1-9][0-9]*\.0$', expiration):`
			`expiration = expiration + "a1"`

			`definition['expires_in_version'] = expiration`
Bug 742500 - Disable expired telemetry probes. r=vladan 2014-01-03 09:02:48 -08:00
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`@staticmethod`
			`def check_keys(name, definition, allowed_keys):`
			`for key in definition.iterkeys():`
			`if key not in allowed_keys:`
			`raise KeyError, '%s not permitted for %s' % (key, name)`

			`def set_bucket_parameters(self, low, high, n_buckets):`
Bug 748417 - double-check Python-generated range-information; r=taras 2012-08-27 13:47:32 -07:00			`def try_to_coerce_to_number(v):`
			`try:`
			`return eval(v, {})`
			`except:`
			`return v`
			`self._low = try_to_coerce_to_number(low)`
			`self._high = try_to_coerce_to_number(high)`
			`self._n_buckets = try_to_coerce_to_number(n_buckets)`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00
			`@staticmethod`
			`def boolean_flag_bucket_parameters(definition):`
Bug 789371 - fix min/max/n_buckets for boolean and flag histograms; r=taras 2012-09-11 01:33:16 -07:00			`return (1, 2, 3)`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00
			`@staticmethod`
			`def linear_bucket_parameters(definition):`
			`return (definition.get('low', 1),`
			`definition['high'],`
			`definition['n_buckets'])`

			`@staticmethod`
			`def enumerated_bucket_parameters(definition):`
			`n_values = definition['n_values']`
			`return (1, n_values, "%s+1" % n_values)`

			`@staticmethod`
			`def exponential_bucket_parameters(definition):`
			`return (definition.get('low', 1),`
			`definition['high'],`
			`definition['n_buckets'])`

Bug 781531 - generate histogram information from JSON; r=taras 2012-08-24 12:54:55 -07:00			`def from_file(filename):`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`"""Return an iterator that provides a sequence of Histograms for`
			`the histograms defined in filename.`
Bug 781531 - generate histogram information from JSON; r=taras 2012-08-24 12:54:55 -07:00			`"""`
			`with open(filename, 'r') as f:`
Bug 800557 - Build shouldn't depend on simplejson. r=froydnj, r=ted 2012-10-11 14:19:40 -07:00			`histograms = json.load(f, object_pairs_hook=OrderedDict)`
Bug 748417 - provide a proper Histogram class; r=taras 2012-08-23 13:32:34 -07:00			`for (name, definition) in histograms.iteritems():`
			`yield Histogram(name, definition)`