Merge 3.2: Issue #13703 plus some related test suite fixes.

2026-02-12 12:57:15 -08:00 · 2012-02-21 00:33:36 +01:00
parent b5c793a0b3 9edceb3e59
commit 2fb477c0f0
38 changed files with 706 additions and 174 deletions
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -252,11 +252,15 @@ always available.
   :const:`verbose`              :option:`-v`
   :const:`bytes_warning`        :option:`-b`
   :const:`quiet`                :option:`-q`
+   :const:`hash_randomization`   :option:`-R`
   ============================= =============================

   .. versionchanged:: 3.2
      Added ``quiet`` attribute for the new :option:`-q` flag.

+   .. versionadded:: 3.2.3
+      The ``hash_randomization`` attribute.
+
   .. versionchanged:: 3.3
      Removed obsolete ``division_warning`` attribute.

--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -1277,6 +1277,8 @@ Basic customization
   inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__`
   had been explicitly set to :const:`None`.

+   See also the :option:`-R` command-line option.
+

 .. method:: object.__bool__(self)

--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -24,7 +24,7 @@ Command line

 When invoking Python, you may specify any of these options::

-    python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args]
+    python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args]

 The most common use case is, of course, a simple invocation of a script::

@@ -227,6 +227,29 @@ Miscellaneous options
   .. versionadded:: 3.2


+.. cmdoption:: -R
+
+   Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes
+   and datetime objects are "salted" with an unpredictable random value.
+   Although they remain constant within an individual Python process, they are
+   not predictable between repeated invocations of Python.
+
+   This is intended to provide protection against a denial-of-service caused by
+   carefully-chosen inputs that exploit the worst case performance of a dict
+   insertion, O(n^2) complexity.  See
+   http://www.ocert.org/advisories/ocert-2011-003.html for details.
+
+   Changing hash values affects the order in which keys are retrieved from a
+   dict.  Although Python has never made guarantees about this ordering (and it
+   typically varies between 32-bit and 64-bit builds), enough real-world code
+   implicitly relies on this non-guaranteed behavior that the randomization is
+   disabled by default.
+
+   See also :envvar:`PYTHONHASHSEED`.
+
+   .. versionadded:: 3.2.3
+
+
 .. cmdoption:: -s

   Don't add the :data:`user site-packages directory <site.USER_SITE>` to
@@ -352,6 +375,7 @@ Options you shouldn't use

 .. _Jython: http://jython.org

+
 .. _using-on-envvars:

 Environment variables
@@ -460,6 +484,27 @@ These environment variables influence Python's behavior.
   option.


+.. envvar:: PYTHONHASHSEED
+
+   If this variable is set to ``random``, the effect is the same as specifying
+   the :option:`-R` option: a random value is used to seed the hashes of str,
+   bytes and datetime objects.
+
+   If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed
+   seed for generating the hash() of the types covered by the hash
+   randomization.
+
+   Its purpose is to allow repeatable hashing, such as for selftests for the
+   interpreter itself, or to allow a cluster of python processes to share hash
+   values.
+
+   The integer must be a decimal number in the range [0,4294967295].  Specifying
+   the value 0 will lead to the same hash values as when hash randomization is
+   disabled.
+
+   .. versionadded:: 3.2.3
+
+
 .. envvar:: PYTHONIOENCODING

   If this is set before running the interpreter, it overrides the encoding used
--- a/Include/object.h
+++ b/Include/object.h
@@ -554,6 +554,12 @@ PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
 PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
 #endif

+typedef struct {
+    Py_hash_t prefix;
+    Py_hash_t suffix;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
+
 /* Helper for passing objects to printf and the like */
 #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))

--- a/Include/pydebug.h
+++ b/Include/pydebug.h
@@ -19,6 +19,7 @@ PyAPI_DATA(int) Py_IgnoreEnvironmentFlag;
 PyAPI_DATA(int) Py_DontWriteBytecodeFlag;
 PyAPI_DATA(int) Py_NoUserSiteDirectory;
 PyAPI_DATA(int) Py_UnbufferedStdioFlag;
+PyAPI_DATA(int) Py_HashRandomizationFlag;

 /* this is a wrapper around getenv() that pays attention to
   Py_IgnoreEnvironmentFlag.  It should be used for getting variables like
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -246,6 +246,8 @@ typedef void (*PyOS_sighandler_t)(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
 PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);

+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);

 #ifdef __cplusplus
 }
--- a/Lib/json/init.py
+++ b/Lib/json/init.py
@@ -31,7 +31,9 @@ Encoding basic Python object hierarchies::
 Compact encoding::

    >>> import json
-    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':'))
+    >>> from collections import OrderedDict
+    >>> mydict = OrderedDict([('4', 5), ('6', 7)])
+    >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
    '[1,2,3,{"4":5,"6":7}]'

 Pretty printing::
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -852,23 +852,6 @@ try:
 except NameError: # statvfs_result may not exist
    pass

-if not _exists("urandom"):
-    def urandom(n):
-        """urandom(n) -> str
-
-        Return a string of n random bytes suitable for cryptographic use.
-
-        """
-        try:
-            _urandomfd = open("/dev/urandom", O_RDONLY)
-        except (OSError, IOError):
-            raise NotImplementedError("/dev/urandom (or equivalent) not found")
-        bs = b""
-        while len(bs) < n:
-            bs += read(_urandomfd, n - len(bs))
-        close(_urandomfd)
-        return bs
-
 # Supply os.popen()
 def popen(cmd, mode="r", buffering=-1):
    if not isinstance(cmd, str):
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -1786,8 +1786,6 @@ class TestDateTime(TestDate):
        self.assertTrue(abs(from_timestamp - from_now) <= tolerance)

    def test_strptime(self):
-        import _strptime
-
        string = '2004-12-01 13:02:47.197'
        format = '%Y-%m-%d %H:%M:%S.%f'
        expected = _strptime._strptime_datetime(self.theclass, string, format)
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase):
    def _reference(self):
        """Return a dictionary of values which are invariant by storage
        in the object under test."""
-        return {1:2, "key1":"value1", "key2":(1,2,3)}
+        return {"1": "2", "key1":"value1", "key2":(1,2,3)}
    def _empty_mapping(self):
        """Return an empty mapping object"""
        return self.type2test()
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -3,7 +3,6 @@

 import sys
 import os
-import re
 import os.path
 import tempfile
 import subprocess
@@ -20,11 +19,15 @@ def _assert_python(expected_success, *args, **env_vars):
    cmd_line = [sys.executable]
    if not env_vars:
        cmd_line.append('-E')
-    cmd_line.extend(args)
    # Need to preserve the original environment, for in-place testing of
    # shared library builds.
    env = os.environ.copy()
+    # But a special flag that can be set to override -- in this case, the
+    # caller is responsible to pass the full environment.
+    if env_vars.pop('__cleanenv', None):
+        env = {}
    env.update(env_vars)
+    cmd_line.extend(args)
    p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                         env=env)
--- a/Lib/test/support.py
+++ b/Lib/test/support.py
@@ -1588,6 +1588,7 @@ def args_from_interpreter_flags():
    flag_opt_map = {
        'bytes_warning': 'b',
        'dont_write_bytecode': 'B',
+        'hash_randomization': 'R',
        'ignore_environment': 'E',
        'no_user_site': 's',
        'no_site': 'S',
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -324,6 +324,22 @@ class CmdLineTest(unittest.TestCase):
    def test_no_std_streams(self):
        self._test_no_stdio(['stdin', 'stdout', 'stderr'])

+    def test_hash_randomization(self):
+        # Verify that -R enables hash randomization:
+        self.verify_valid_flag('-R')
+        hashes = []
+        for i in range(2):
+            code = 'print(hash("spam"))'
+            rc, out, err = assert_python_ok('-R', '-c', code)
+            self.assertEqual(rc, 0)
+            hashes.append(out)
+        self.assertNotEqual(hashes[0], hashes[1])
+
+        # Verify that sys.flags contains hash_randomization
+        code = 'import sys; print("random is", sys.flags.hash_randomization)'
+        rc, out, err = assert_python_ok('-R', '-c', code)
+        self.assertEqual(rc, 0)
+        self.assertIn(b'random is 1', out)

 def test_main():
    test.support.run_unittest(CmdLineTest)
--- a/Lib/test/test_datetime.py
+++ b/Lib/test/test_datetime.py
@@ -1,7 +1,9 @@
 import unittest
 import sys
 from test.support import import_fresh_module, run_unittest
+
 TESTS = 'test.datetimetester'
+
 # XXX: import_fresh_module() is supposed to leave sys.module cache untouched,
 # XXX: but it does not, so we have to save and restore it ourselves.
 save_sys_modules = sys.modules.copy()
@@ -15,28 +17,32 @@ finally:
    sys.modules.update(save_sys_modules)
 test_modules = [pure_tests, fast_tests]
 test_suffixes = ["_Pure", "_Fast"]
+# XXX(gb) First run all the _Pure tests, then all the _Fast tests.  You might
+# not believe this, but in spite of all the sys.modules trickery running a _Pure
+# test last will leave a mix of pure and native datetime stuff lying around.
+test_classes = []

 for module, suffix in zip(test_modules, test_suffixes):
    for name, cls in module.__dict__.items():
-        if isinstance(cls, type) and issubclass(cls, unittest.TestCase):
-            name += suffix
-            cls.__name__ = name
-            globals()[name] = cls
-            def setUp(self, module=module, setup=cls.setUp):
-                self._save_sys_modules = sys.modules.copy()
-                sys.modules[TESTS] = module
-                sys.modules['datetime'] = module.datetime_module
-                sys.modules['_strptime'] = module._strptime
-                setup(self)
-            def tearDown(self, teardown=cls.tearDown):
-                teardown(self)
-                sys.modules.clear()
-                sys.modules.update(self._save_sys_modules)
-            cls.setUp = setUp
-            cls.tearDown = tearDown
+        if not (isinstance(cls, type) and issubclass(cls, unittest.TestCase)):
+            continue
+        cls.__name__ = name + suffix
+        @classmethod
+        def setUpClass(cls_, module=module):
+            cls_._save_sys_modules = sys.modules.copy()
+            sys.modules[TESTS] = module
+            sys.modules['datetime'] = module.datetime_module
+            sys.modules['_strptime'] = module._strptime
+        @classmethod
+        def tearDownClass(cls_):
+            sys.modules.clear()
+            sys.modules.update(cls_._save_sys_modules)
+        cls.setUpClass = setUpClass
+        cls.tearDownClass = tearDownClass
+        test_classes.append(cls)

 def test_main():
-    run_unittest(__name__)
+    run_unittest(*test_classes)

 if __name__ == "__main__":
    test_main()
--- a/Lib/test/test_dbm_gnu.py
+++ b/Lib/test/test_dbm_gnu.py
@@ -53,7 +53,7 @@ class TestGdbm(unittest.TestCase):
        all = set(gdbm.open_flags)
        # Test standard flags (presumably "crwn").
        modes = all - set('fsu')
-        for mode in modes:
+        for mode in sorted(modes):  # put "c" mode first
            self.g = gdbm.open(filename, mode)
            self.g.close()

--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -350,12 +350,13 @@ Variable names:
   6: args
   7: kwds
 Cell variables:
-   0: e
-   1: d
-   2: f
-   3: y
-   4: x
-   5: z"""
+   0: [edfxyz]
+   1: [edfxyz]
+   2: [edfxyz]
+   3: [edfxyz]
+   4: [edfxyz]
+   5: [edfxyz]"""
+# NOTE: the order of the cell variables above depends on dictionary order!

 co_tricky_nested_f = tricky.__func__.__code__.co_consts[1]

@@ -374,12 +375,12 @@ Names:
 Variable names:
   0: c
 Free variables:
-   0: e
-   1: d
-   2: f
-   3: y
-   4: x
-   5: z"""
+   0: [edfxyz]
+   1: [edfxyz]
+   2: [edfxyz]
+   3: [edfxyz]
+   4: [edfxyz]
+   5: [edfxyz]"""

 code_info_expr_str = """\
 Name:              <module>
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -52,13 +52,18 @@ class DebuggerTests(unittest.TestCase):

    """Test that the debugger can debug Python."""

-    def run_gdb(self, *args):
+    def run_gdb(self, *args, **env_vars):
        """Runs gdb with the command line given by *args.

        Returns its stdout, stderr
        """
+        if env_vars:
+            env = os.environ.copy()
+            env.update(env_vars)
+        else:
+            env = None
        out, err = subprocess.Popen(
-            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env,
            ).communicate()
        return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace')

@@ -118,7 +123,7 @@ class DebuggerTests(unittest.TestCase):
        # print ' '.join(args)

        # Use "args" to invoke gdb, capturing stdout, stderr:
-        out, err = self.run_gdb(*args)
+        out, err = self.run_gdb(*args, PYTHONHASHSEED='0')

        # Ignore some noise on stderr due to the pending breakpoint:
        err = err.replace('Function "%s" not defined.\n' % breakpoint, '')
@@ -207,7 +212,8 @@ class PrettyPrintTests(DebuggerTests):
        'Verify the pretty-printing of dictionaries'
        self.assertGdbRepr({})
        self.assertGdbRepr({'foo': 'bar'})
-        self.assertGdbRepr({'foo': 'bar', 'douglas':42})
+        self.assertGdbRepr({'foo': 'bar', 'douglas': 42},
+                           "{'foo': 'bar', 'douglas': 42}")

    def test_lists(self):
        'Verify the pretty-printing of lists'
@@ -269,8 +275,8 @@ class PrettyPrintTests(DebuggerTests):
    def test_sets(self):
        'Verify the pretty-printing of sets'
        self.assertGdbRepr(set())
-        self.assertGdbRepr(set(['a', 'b']))
-        self.assertGdbRepr(set([4, 5, 6]))
+        self.assertGdbRepr(set(['a', 'b']), "{'a', 'b'}")
+        self.assertGdbRepr(set([4, 5, 6]), "{4, 5, 6}")

        # Ensure that we handle sets containing the "dummy" key value,
        # which happens on deletion:
@@ -282,8 +288,8 @@ id(s)''')
    def test_frozensets(self):
        'Verify the pretty-printing of frozensets'
        self.assertGdbRepr(frozenset())
-        self.assertGdbRepr(frozenset(['a', 'b']))
-        self.assertGdbRepr(frozenset([4, 5, 6]))
+        self.assertGdbRepr(frozenset(['a', 'b']), "frozenset({'a', 'b'})")
+        self.assertGdbRepr(frozenset([4, 5, 6]), "frozenset({4, 5, 6})")

    def test_exceptions(self):
        # Test a RuntimeError
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -3,10 +3,16 @@
 #
 # Also test that hash implementations are inherited as expected

+import datetime
+import os
+import sys
 import unittest
 from test import support
+from test.script_helper import assert_python_ok
 from collections import Hashable

+IS_64BIT = sys.maxsize > 2**32
+

 class HashEqualityTestCase(unittest.TestCase):

@@ -117,10 +123,92 @@ class HashBuiltinsTestCase(unittest.TestCase):
        for obj in self.hashes_to_check:
            self.assertEqual(hash(obj), _default_hash(obj))

+class HashRandomizationTests(unittest.TestCase):
+
+    # Each subclass should define a field "repr_", containing the repr() of
+    # an object to be tested
+
+    def get_hash_command(self, repr_):
+        return 'print(hash(%s))' % repr_
+
+    def get_hash(self, repr_, seed=None):
+        env = os.environ.copy()
+        env['__cleanenv'] = True  # signal to assert_python not to do a copy
+                                  # of os.environ on its own
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        else:
+            env.pop('PYTHONHASHSEED', None)
+        out = assert_python_ok(
+            '-c', self.get_hash_command(repr_),
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash(self.repr_, seed='random')
+        run2 = self.get_hash(self.repr_, seed='random')
+        self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        if IS_64BIT:
+            known_hash_of_obj = 1453079729188098211
+        else:
+            known_hash_of_obj = -1600925533
+
+        # Randomization is disabled by default:
+        self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+        # It can also be disabled by setting the seed to 0:
+        self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        # Note that all types share the same values:
+        if IS_64BIT:
+            h = -4410911502303878509
+        else:
+            h = -206076799
+        self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr('abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr(b'abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(b""), 0)
+
+class DatetimeTests(HashRandomizationTests):
+    def get_hash_command(self, repr_):
+        return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests):
+    repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests):
+    repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests):
+    repr_ = repr(datetime.time(0))
+
+
 def test_main():
    support.run_unittest(HashEqualityTestCase,
-                              HashInheritanceTestCase,
-                              HashBuiltinsTestCase)
+                         HashInheritanceTestCase,
+                         HashBuiltinsTestCase,
+                         StrHashRandomizationTests,
+                         BytesHashRandomizationTests,
+                         DatetimeDateTests,
+                         DatetimeDatetimeTests,
+                         DatetimeTimeTests)


 if __name__ == "__main__":
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -26,6 +26,7 @@ try:
    import threading
 except ImportError:
    threading = None
+from test.script_helper import assert_python_ok

 os.stat_float_times(True)
 st = os.stat(__file__)
@@ -794,14 +795,33 @@ class DevNullTests(unittest.TestCase):
            self.assertEqual(f.read(), b'')

 class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(0)), 0)
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)

@contextlib.contextmanager
 def _execvpe_mockup(defpath=None):
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -733,6 +733,17 @@ class TestBasicOps(unittest.TestCase):
        if self.repr is not None:
            self.assertEqual(repr(self.set), self.repr)

+    def check_repr_against_values(self):
+        text = repr(self.set)
+        self.assertTrue(text.startswith('{'))
+        self.assertTrue(text.endswith('}'))
+
+        result = text[1:-1].split(', ')
+        result.sort()
+        sorted_repr_values = [repr(value) for value in self.values]
+        sorted_repr_values.sort()
+        self.assertEqual(result, sorted_repr_values)
+
    def test_print(self):
        try:
            fo = open(support.TESTFN, "w")
@@ -891,7 +902,9 @@ class TestBasicOpsString(TestBasicOps):
        self.set    = set(self.values)
        self.dup    = set(self.values)
        self.length = 3
-        self.repr   = "{'a', 'c', 'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()

 #------------------------------------------------------------------------------

@@ -902,7 +915,9 @@ class TestBasicOpsBytes(TestBasicOps):
        self.set    = set(self.values)
        self.dup    = set(self.values)
        self.length = 3
-        self.repr   = "{b'a', b'c', b'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()

 #------------------------------------------------------------------------------

@@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps):
        self.set    = set(self.values)
        self.dup    = set(self.values)
        self.length = 4
-        self.repr   = "{'a', b'a', 'b', b'b'}"

    def tearDown(self):
        self._warning_filters.__exit__(None, None, None)

+    def test_repr(self):
+        self.check_repr_against_values()
+
 #==============================================================================

 def baditer():
--- a/Show More
+++ b/Show More