mirror of
https://github.com/AdaCore/cpython.git
synced 2026-02-12 12:57:15 -08:00
bpo-28685: Optimize sorted() list.sort() with type-specialized comparisons (#582)
This commit is contained in:
@@ -260,6 +260,120 @@ class TestDecorateSortUndecorate(unittest.TestCase):
|
||||
self.assertEqual(data, copy2)
|
||||
|
||||
#==============================================================================
|
||||
def check_against_PyObject_RichCompareBool(self, L):
|
||||
## The idea here is to exploit the fact that unsafe_tuple_compare uses
|
||||
## PyObject_RichCompareBool for the second elements of tuples. So we have,
|
||||
## for (most) L, sorted(L) == [y[1] for y in sorted([(0,x) for x in L])]
|
||||
## This will work as long as __eq__ => not __lt__ for all the objects in L,
|
||||
## which holds for all the types used below.
|
||||
##
|
||||
## Testing this way ensures that the optimized implementation remains consistent
|
||||
## with the naive implementation, even if changes are made to any of the
|
||||
## richcompares.
|
||||
##
|
||||
## This function tests sorting for three lists (it randomly shuffles each one):
|
||||
## 1. L
|
||||
## 2. [(x,) for x in L]
|
||||
## 3. [((x,),) for x in L]
|
||||
|
||||
random.seed(0)
|
||||
random.shuffle(L)
|
||||
L_1 = L[:]
|
||||
L_2 = [(x,) for x in L]
|
||||
L_3 = [((x,),) for x in L]
|
||||
for L in [L_1, L_2, L_3]:
|
||||
optimized = sorted(L)
|
||||
reference = [y[1] for y in sorted([(0,x) for x in L])]
|
||||
for (opt, ref) in zip(optimized, reference):
|
||||
self.assertIs(opt, ref)
|
||||
#note: not assertEqual! We want to ensure *identical* behavior.
|
||||
|
||||
class TestOptimizedCompares(unittest.TestCase):
|
||||
def test_safe_object_compare(self):
|
||||
heterogeneous_lists = [[0, 'foo'],
|
||||
[0.0, 'foo'],
|
||||
[('foo',), 'foo']]
|
||||
for L in heterogeneous_lists:
|
||||
self.assertRaises(TypeError, L.sort)
|
||||
self.assertRaises(TypeError, [(x,) for x in L].sort)
|
||||
self.assertRaises(TypeError, [((x,),) for x in L].sort)
|
||||
|
||||
float_int_lists = [[1,1.1],
|
||||
[1<<70,1.1],
|
||||
[1.1,1],
|
||||
[1.1,1<<70]]
|
||||
for L in float_int_lists:
|
||||
check_against_PyObject_RichCompareBool(self, L)
|
||||
|
||||
def test_unsafe_object_compare(self):
|
||||
|
||||
# This test is by ppperry. It ensures that unsafe_object_compare is
|
||||
# verifying ms->key_richcompare == tp->richcompare before comparing.
|
||||
|
||||
class WackyComparator(int):
|
||||
def __lt__(self, other):
|
||||
elem.__class__ = WackyList2
|
||||
return int.__lt__(self, other)
|
||||
|
||||
class WackyList1(list):
|
||||
pass
|
||||
|
||||
class WackyList2(list):
|
||||
def __lt__(self, other):
|
||||
raise ValueError
|
||||
|
||||
L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
|
||||
elem = L[-1]
|
||||
with self.assertRaises(ValueError):
|
||||
L.sort()
|
||||
|
||||
L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
|
||||
elem = L[-1]
|
||||
with self.assertRaises(ValueError):
|
||||
[(x,) for x in L].sort()
|
||||
|
||||
# The following test is also by ppperry. It ensures that
|
||||
# unsafe_object_compare handles Py_NotImplemented appropriately.
|
||||
class PointlessComparator:
|
||||
def __lt__(self, other):
|
||||
return NotImplemented
|
||||
L = [PointlessComparator(), PointlessComparator()]
|
||||
self.assertRaises(TypeError, L.sort)
|
||||
self.assertRaises(TypeError, [(x,) for x in L].sort)
|
||||
|
||||
# The following tests go through various types that would trigger
|
||||
# ms->key_compare = unsafe_object_compare
|
||||
lists = [list(range(100)) + [(1<<70)],
|
||||
[str(x) for x in range(100)] + ['\uffff'],
|
||||
[bytes(x) for x in range(100)],
|
||||
[cmp_to_key(lambda x,y: x<y)(x) for x in range(100)]]
|
||||
for L in lists:
|
||||
check_against_PyObject_RichCompareBool(self, L)
|
||||
|
||||
def test_unsafe_latin_compare(self):
|
||||
check_against_PyObject_RichCompareBool(self, [str(x) for
|
||||
x in range(100)])
|
||||
|
||||
def test_unsafe_long_compare(self):
|
||||
check_against_PyObject_RichCompareBool(self, [x for
|
||||
x in range(100)])
|
||||
|
||||
def test_unsafe_float_compare(self):
|
||||
check_against_PyObject_RichCompareBool(self, [float(x) for
|
||||
x in range(100)])
|
||||
|
||||
def test_unsafe_tuple_compare(self):
|
||||
# This test was suggested by Tim Peters. It verifies that the tuple
|
||||
# comparison respects the current tuple compare semantics, which do not
|
||||
# guarantee that x < x <=> (x,) < (x,)
|
||||
#
|
||||
# Note that we don't have to put anything in tuples here, because
|
||||
# the check function does a tuple test automatically.
|
||||
|
||||
check_against_PyObject_RichCompareBool(self, [float('nan')]*100)
|
||||
check_against_PyObject_RichCompareBool(self, [float('nan') for
|
||||
_ in range(100)])
|
||||
#==============================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -554,6 +554,7 @@ Tiago Gonçalves
|
||||
Chris Gonnerman
|
||||
Shelley Gooch
|
||||
David Goodger
|
||||
Elliot Gorokhovsky
|
||||
Hans de Graaff
|
||||
Tim Graham
|
||||
Kim Gräsman
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
Optimize list.sort() and sorted() by using type specialized comparisons when
|
||||
possible.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -753,3 +753,11 @@ example, with the region of uncertainty B[4], B[5], B[6], there are 4
|
||||
locations: before B[4], between B[4] and B[5], between B[5] and B[6], and
|
||||
after B[6]. In general, across 2**(k-1)-1 elements, there are 2**(k-1)
|
||||
locations. That's why k-1 binary searches are necessary and sufficient.
|
||||
|
||||
OPTIMIZATION OF INDIVIDUAL COMPARISONS
|
||||
As noted above, even the simplest Python comparison triggers a large pile of
|
||||
C-level pointer dereferences, conditionals, and function calls. This can be
|
||||
partially mitigated by pre-scanning the data to determine whether the data is
|
||||
homogenous with respect to type. If so, it is sometimes possible to
|
||||
substitute faster type-specific comparisons for the slower, generic
|
||||
PyObject_RichCompareBool.
|
||||
|
||||
Reference in New Issue
Block a user