@@ -1220,6 +1220,13 @@ struct s_MergeState {
12201220 * of tuples. It may be set to safe_object_compare, but the idea is that hopefully
12211221 * we can assume more, and use one of the special-case compares. */
12221222 int (* tuple_elem_compare )(PyObject * , PyObject * , MergeState * );
1223+
1224+ /* Used by unsafe_tuple_compare to record whether the very first tuple
1225+ * elements resolved the last comparison attempt. If so, next time a
1226+ * method that may avoid PyObject_RichCompareBool() entirely is tried.
1227+ * 0 for false, 1 for true.
1228+ */
1229+ int first_tuple_items_resolved_it ;
12231230};
12241231
12251232/* binarysort is the best method for sorting small arrays: it does
@@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
21902197 * using the same pre-sort check as we use for ms->key_compare,
21912198 * but run on the list [x[0] for x in L]. This allows us to optimize compares
21922199 * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
2193- * that most tuple compares don't involve x[1:]. */
2200+ * that most tuple compares don't involve x[1:].
2201+ * However, that may not be right. When it is right, we can win by calling the
2202+ * relatively cheap ms->tuple_elem_compare on the first pair of elements, to
2203+ * see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done.
2204+ * Else we proceed as in the tuple compare, comparing the remaining pairs via
2205+ * the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if
2206+ * ever) that says "no, not equal!". Then, if we're still on the first pair,
2207+ * ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(...,
2208+ * Py_LT) finishes the job.
2209+ * In any case, ms->first_tuple_items_resolved_it keeps track of whether the
2210+ * most recent tuple comparison was resolved by the first pair. If so, the
2211+ * next attempt starts by trying the cheap tests on the first pair again, else
2212+ * PyObject_RichCompareBool(..., Py_EQ) is used from the start.
2213+ * There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper!
2214+ * For example, that can return "almost immediately" if passed the same
2215+ * object twice (it special-cases object identity for Py_EQ), which can,
2216+ * potentially, be unboundedly faster than ms->tuple_elem_compare.
2217+ */
21942218static int
21952219unsafe_tuple_compare (PyObject * v , PyObject * w , MergeState * ms )
21962220{
@@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
22062230
22072231 vt = (PyTupleObject * )v ;
22082232 wt = (PyTupleObject * )w ;
2233+ i = 0 ;
2234+ if (ms -> first_tuple_items_resolved_it ) {
2235+ /* See whether fast compares of the first elements settle it. */
2236+ k = ms -> tuple_elem_compare (vt -> ob_item [0 ], wt -> ob_item [0 ], ms );
2237+ if (k ) /* error, or v < w */
2238+ return k ;
2239+ k = ms -> tuple_elem_compare (wt -> ob_item [0 ], vt -> ob_item [0 ], ms );
2240+ if (k > 0 ) /* w < v */
2241+ return 0 ;
2242+ if (k < 0 ) /* error */
2243+ return -1 ;
2244+ /* We have
2245+ * not (v[0] < w[0]) and not (w[0] < v[0])
2246+ * which implies, for a total order, that the first elements are
2247+ * equal. So skip them in the loop.
2248+ */
2249+ i = 1 ;
2250+ ms -> first_tuple_items_resolved_it = 0 ;
2251+ }
2252+ /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0
2253+ * at the end of the `if` block just above.
2254+ */
2255+ assert (! ms -> first_tuple_items_resolved_it );
22092256
22102257 vlen = Py_SIZE (vt );
22112258 wlen = Py_SIZE (wt );
2212-
2213- for (i = 0 ; i < vlen && i < wlen ; i ++ ) {
2259+ for (; i < vlen && i < wlen ; i ++ ) {
22142260 k = PyObject_RichCompareBool (vt -> ob_item [i ], wt -> ob_item [i ], Py_EQ );
2261+ if (!k ) { /* not equal */
2262+ if (i ) {
2263+ return PyObject_RichCompareBool (vt -> ob_item [i ], wt -> ob_item [i ],
2264+ Py_LT );
2265+ }
2266+ else {
2267+ ms -> first_tuple_items_resolved_it = 1 ;
2268+ return ms -> tuple_elem_compare (vt -> ob_item [0 ], wt -> ob_item [0 ],
2269+ ms );
2270+ }
2271+ }
22152272 if (k < 0 )
22162273 return -1 ;
2217- if (!k )
2218- break ;
22192274 }
2275+ /* all equal until we fell off the end */
2276+ return vlen < wlen ;
22202277
2221- if (i >= vlen || i >= wlen )
2222- return vlen < wlen ;
2223-
2224- if (i == 0 )
2225- return ms -> tuple_elem_compare (vt -> ob_item [i ], wt -> ob_item [i ], ms );
2226- else
2227- return PyObject_RichCompareBool (vt -> ob_item [i ], wt -> ob_item [i ], Py_LT );
2228- }
2278+ }
22292279
22302280/* An adaptive, stable, natural mergesort. See listsort.txt.
22312281 * Returns Py_None on success, NULL on error. Even in case of error, the
@@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
24082458 }
24092459
24102460 ms .key_compare = unsafe_tuple_compare ;
2461+ ms .first_tuple_items_resolved_it = 1 ; /* be optimistic */
24112462 }
24122463 }
24132464 /* End of pre-sort check: ms is now set properly! */
0 commit comments