|
1 | | -from typing import List |
| 1 | +""" |
| 2 | +Smoothsort algorithm implementation. |
2 | 3 |
|
| 4 | +Smoothsort is an adaptive, in-place comparison sort invented by Edsger W. Dijkstra. |
| 5 | +It runs in O(n log n) worst-case and degrades gracefully to O(n) for nearly sorted data. |
| 6 | +It uses a forest of Leonardo heaps to achieve this adaptive behaviour. |
3 | 7 |
|
4 | | -def smoothsort(seq: List[int]) -> List[int]: |
| 8 | +Reference: |
| 9 | + https://en.wikipedia.org/wiki/Smoothsort |
| 10 | + https://www.cs.utexas.edu/~EWD/ewd07xx/EWD796a.PDF |
| 11 | +""" |
| 12 | + |
| 13 | + |
| 14 | +# Precomputed Leonardo numbers: L(0)=1, L(1)=1, L(k)=L(k-1)+L(k-2)+1. |
| 15 | +# 46 values comfortably cover all practical list sizes. |
| 16 | +_LEONARDO: list[int] = [1, 1] |
| 17 | +while _LEONARDO[-1] < 2**31: |
| 18 | + _LEONARDO.append(_LEONARDO[-1] + _LEONARDO[-2] + 1) |
| 19 | + |
| 20 | + |
| 21 | +def _sift(seq: list[int], root: int, order: int) -> None: |
5 | 22 | """ |
6 | | - Smoothsort algorithm (Edsger W. Dijkstra). |
| 23 | + Restore the max-heap property within a Leonardo tree of the given ``order``. |
7 | 24 |
|
8 | | - Adaptive sorting algorithm: O(n log n) worst-case, O(n) for nearly sorted data. |
9 | | - Uses Leonardo heaps to improve performance on nearly sorted lists. |
| 25 | + Sifts ``seq[root]`` downward until the subtree satisfies the Leonardo |
| 26 | + max-heap invariant: every node is >= both of its children. |
| 27 | + Trees of order 0 or 1 are single nodes and already satisfy the invariant. |
10 | 28 |
|
11 | | - Reference: |
12 | | - https://en.wikipedia.org/wiki/Smoothsort |
| 29 | + In a Leonardo tree of order k rooted at index ``root``: |
| 30 | + - the right child root is at ``root - 1`` |
| 31 | + - the left child root is at ``root - 1 - L(k-2)`` |
| 32 | +
|
| 33 | + Args: |
| 34 | + seq: The list being sorted (mutated in-place). |
| 35 | + root: Index of the root of the Leonardo tree to fix. |
| 36 | + order: Leonardo order of the tree rooted at ``root``. |
| 37 | +
|
| 38 | + Examples: |
| 39 | + >>> data = [3, 5, 4] |
| 40 | + >>> _sift(data, 2, 2) |
| 41 | + >>> data |
| 42 | + [3, 4, 5] |
| 43 | +
|
| 44 | + >>> data = [1, 2, 3] |
| 45 | + >>> _sift(data, 2, 2) |
| 46 | + >>> data |
| 47 | + [1, 2, 3] |
| 48 | +
|
| 49 | + >>> data = [7] |
| 50 | + >>> _sift(data, 0, 1) |
| 51 | + >>> data |
| 52 | + [7] |
13 | 53 |
|
14 | | - >>> smoothsort([4, 1, 3, 9, 7]) |
15 | | - [1, 3, 4, 7, 9] |
16 | | - >>> smoothsort([]) |
17 | | - [] |
18 | | - >>> smoothsort([1]) |
19 | | - [1] |
20 | | - >>> smoothsort([5, 4, 3, 2, 1]) |
21 | | - [1, 2, 3, 4, 5] |
22 | | - >>> smoothsort([3, 3, 2, 1, 2]) |
23 | | - [1, 2, 2, 3, 3] |
| 54 | + >>> data = [9, 1, 8, 5, 3] |
| 55 | + >>> _sift(data, 4, 3) |
| 56 | + >>> data |
| 57 | + [3, 1, 9, 5, 8] |
24 | 58 | """ |
| 59 | + while order > 1: |
| 60 | + right = root - 1 # right child root |
| 61 | + left = root - 1 - _LEONARDO[order - 2] # left child root |
25 | 62 |
|
26 | | - # Leonardo numbers for heaps |
27 | | - leonardo: List[int] = [1, 1] |
28 | | - for _ in range(2, 24): |
29 | | - leonardo.append(leonardo[-1] + leonardo[-2] + 1) |
30 | | - |
31 | | - def _sift(start: int, size: int) -> None: |
32 | | - """Restore heap property in a Leonardo heap (internal helper).""" |
33 | | - while size > 1: |
34 | | - r = start - 1 |
35 | | - l = start - 1 - leonardo[size - 2] |
36 | | - if seq[start] < seq[l] or seq[start] < seq[r]: |
37 | | - if seq[l] > seq[r]: |
38 | | - seq[start], seq[l] = seq[l], seq[start] |
39 | | - start = l |
40 | | - size -= 1 |
41 | | - else: |
42 | | - seq[start], seq[r] = seq[r], seq[start] |
43 | | - start = r |
44 | | - size -= 2 |
45 | | - else: |
| 63 | + if seq[left] >= seq[right] and seq[left] > seq[root]: |
| 64 | + seq[root], seq[left] = seq[left], seq[root] |
| 65 | + root = left |
| 66 | + order -= 1 |
| 67 | + elif seq[right] > seq[left] and seq[right] > seq[root]: |
| 68 | + seq[root], seq[right] = seq[right], seq[root] |
| 69 | + root = right |
| 70 | + order -= 2 |
| 71 | + else: |
| 72 | + break |
| 73 | + |
| 74 | + |
| 75 | +def _trinkle( |
| 76 | + seq: list[int], |
| 77 | + pos: int, |
| 78 | + heap_sizes: list[int], |
| 79 | + idx: int, |
| 80 | +) -> None: |
| 81 | + """ |
| 82 | + Restore both the inter-heap root ordering and the intra-heap ordering. |
| 83 | +
|
| 84 | + Walks the value at ``pos`` leftwards through the forest-root chain as |
| 85 | + long as the left-neighbour root is larger, then calls ``_sift`` to fix |
| 86 | + the heap at the final resting position. |
| 87 | +
|
| 88 | + Args: |
| 89 | + seq: The list being sorted (mutated in-place). |
| 90 | + pos: Index of the root being inserted or newly exposed. |
| 91 | + heap_sizes: List of Leonardo orders for the current forest (left to |
| 92 | + right); ``heap_sizes[idx]`` is the order of the tree |
| 93 | + whose root is at ``pos``. |
| 94 | + idx: Position in ``heap_sizes`` for the tree rooted at ``pos``. |
| 95 | +
|
| 96 | + Examples: |
| 97 | + >>> data = [1, 5, 3] |
| 98 | + >>> _trinkle(data, 2, [1, 1], 1) |
| 99 | + >>> data |
| 100 | + [1, 3, 5] |
| 101 | +
|
| 102 | + >>> data = [3, 5, 4] |
| 103 | + >>> _trinkle(data, 2, [2], 0) |
| 104 | + >>> data |
| 105 | + [3, 4, 5] |
| 106 | + """ |
| 107 | + while idx > 0: |
| 108 | + prev_root = pos - _LEONARDO[heap_sizes[idx]] |
| 109 | + if seq[pos] >= seq[prev_root]: |
| 110 | + break |
| 111 | + # Only swap if prev_root is also >= its own children; otherwise |
| 112 | + # moving it would break the heap on the left side. |
| 113 | + if heap_sizes[idx] > 1: |
| 114 | + right = pos - 1 |
| 115 | + left = pos - 1 - _LEONARDO[heap_sizes[idx] - 2] |
| 116 | + if seq[prev_root] <= seq[right] or seq[prev_root] <= seq[left]: |
46 | 117 | break |
| 118 | + seq[pos], seq[prev_root] = seq[prev_root], seq[pos] |
| 119 | + pos = prev_root |
| 120 | + idx -= 1 |
| 121 | + |
| 122 | + _sift(seq, pos, heap_sizes[idx]) |
47 | 123 |
|
48 | | - # Fallback: sort normally to ensure correctness (main function is tested) |
49 | | - if len(seq) < 2: |
| 124 | + |
| 125 | +def smoothsort(seq: list[int]) -> list[int]: |
| 126 | + """ |
| 127 | + Sort a list in-place using the Smoothsort algorithm and return it. |
| 128 | +
|
| 129 | + Smoothsort (Edsger W. Dijkstra, 1981) is an adaptive, in-place sort |
| 130 | + with O(n log n) worst-case time and O(n) best-case time on already-sorted |
| 131 | + input. It improves on Heapsort by maintaining a forest of Leonardo heaps |
| 132 | + whose structure mirrors the sorted prefix of the sequence. |
| 133 | +
|
| 134 | + Args: |
| 135 | + seq: A list of integers to sort. |
| 136 | +
|
| 137 | + Returns: |
| 138 | + The same list object, sorted in ascending order. |
| 139 | +
|
| 140 | + Examples: |
| 141 | + >>> smoothsort([4, 1, 3, 9, 7]) |
| 142 | + [1, 3, 4, 7, 9] |
| 143 | + >>> smoothsort([]) |
| 144 | + [] |
| 145 | + >>> smoothsort([1]) |
| 146 | + [1] |
| 147 | + >>> smoothsort([5, 4, 3, 2, 1]) |
| 148 | + [1, 2, 3, 4, 5] |
| 149 | + >>> smoothsort([3, 3, 2, 1, 2]) |
| 150 | + [1, 2, 2, 3, 3] |
| 151 | + >>> smoothsort([1, 2, 3, 4, 5]) |
| 152 | + [1, 2, 3, 4, 5] |
| 153 | + >>> smoothsort([-3, 0, -1, 5, 2]) |
| 154 | + [-3, -1, 0, 2, 5] |
| 155 | + """ |
| 156 | + n = len(seq) |
| 157 | + if n < 2: |
50 | 158 | return seq |
51 | 159 |
|
52 | | - seq.sort() |
| 160 | + # ``heap_sizes[i]`` is the Leonardo order of the i-th tree (left to right). |
| 161 | + heap_sizes: list[int] = [] |
| 162 | + |
| 163 | + # ------------------------------------------------------------------ |
| 164 | + # Phase 1 – Build the Leonardo heap forest over seq[0..n-1]. |
| 165 | + # ------------------------------------------------------------------ |
| 166 | + for i in range(n): |
| 167 | + # If the two rightmost trees have consecutive orders, merge them. |
| 168 | + if ( |
| 169 | + len(heap_sizes) >= 2 |
| 170 | + and heap_sizes[-2] == heap_sizes[-1] + 1 |
| 171 | + ): |
| 172 | + heap_sizes.pop() |
| 173 | + heap_sizes[-1] += 1 |
| 174 | + elif heap_sizes and heap_sizes[-1] == 1: |
| 175 | + heap_sizes.append(0) |
| 176 | + else: |
| 177 | + heap_sizes.append(1) |
| 178 | + |
| 179 | + _trinkle(seq, i, heap_sizes, len(heap_sizes) - 1) |
| 180 | + |
| 181 | + # ------------------------------------------------------------------ |
| 182 | + # Phase 2 – Extract maximum elements right-to-left. |
| 183 | + # ------------------------------------------------------------------ |
| 184 | + for i in range(n - 1, -1, -1): |
| 185 | + order = heap_sizes.pop() |
| 186 | + if order > 1: |
| 187 | + # Expose the two child roots and re-trinkle each. |
| 188 | + right_order = order - 2 |
| 189 | + left_order = order - 1 |
| 190 | + right_pos = i - 1 |
| 191 | + left_pos = i - 1 - _LEONARDO[right_order] |
| 192 | + |
| 193 | + heap_sizes.append(left_order) |
| 194 | + _trinkle(seq, left_pos, heap_sizes, len(heap_sizes) - 1) |
| 195 | + |
| 196 | + heap_sizes.append(right_order) |
| 197 | + _trinkle(seq, right_pos, heap_sizes, len(heap_sizes) - 1) |
| 198 | + |
53 | 199 | return seq |
| 200 | + |
| 201 | + |
| 202 | +if __name__ == "__main__": |
| 203 | + import doctest |
| 204 | + import random |
| 205 | + |
| 206 | + results = doctest.testmod(verbose=False) |
| 207 | + assert results.failed == 0, f"{results.failed} doctest(s) failed" |
| 208 | + |
| 209 | + for trial in range(5000): |
| 210 | + sample = random.choices(range(-50, 50), k=random.randint(0, 30)) |
| 211 | + got = smoothsort(sample[:]) |
| 212 | + assert got == sorted(sample), ( |
| 213 | + f"Trial {trial}: smoothsort({sample!r}) -> {got!r}, " |
| 214 | + f"expected {sorted(sample)!r}" |
| 215 | + ) |
| 216 | + |
| 217 | + print("All doctests and 5 000 random trials passed.") |
0 commit comments