Skip to content
Open
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions data_structures/heap/median_in_a_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import heapq


def signum(a: int, b: int) -> int:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file data_structures/heap/median_in_a_stream.py, please provide doctest for the function signum

Please provide descriptive name for the parameter: a

Please provide descriptive name for the parameter: b

"""
Compare two integers.

Returns:
1 if a > b
-1 if a < b
0 if a == b
"""
if a > b:
return 1
if a < b:
return -1
return 0


def call_median(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file data_structures/heap/median_in_a_stream.py, please provide doctest for the function call_median

element: int,
max_heap: list[int],
min_heap: list[int],
median: int,
) -> int:
"""
Insert an element into heaps and update the median.
"""
case = signum(len(max_heap), len(min_heap))

if case == 0:
if element > median:
heapq.heappush(min_heap, element)
median = min_heap[0]
else:
heapq.heappush(max_heap, -element)
median = -max_heap[0]

elif case == 1:
if element > median:
heapq.heappush(min_heap, element)
else:
heapq.heappush(min_heap, -heapq.heappop(max_heap))
heapq.heappush(max_heap, -element)
median = (-max_heap[0] + min_heap[0]) // 2

else:
if element > median:
heapq.heappush(max_heap, -heapq.heappop(min_heap))
heapq.heappush(min_heap, element)
else:
heapq.heappush(max_heap, -element)
median = (-max_heap[0] + min_heap[0]) // 2

return median


def median_in_a_stream(numbers: list[int]) -> list[int]:
"""
Find the median after each insertion in a stream of integers.

Uses two heaps and follows the classic running median logic.

Reference:
https://en.wikipedia.org/wiki/Median#Running_median

Args:
numbers: List of integers

Returns:
List of medians after each insertion

Raises:
ValueError: If the input list is empty

>>> median_in_a_stream([20, 14, 13, 16, 17])
[20, 17, 14, 15, 16]
>>> median_in_a_stream([5, 15, 1, 3])
[5, 10, 5, 4]
>>> median_in_a_stream([])
Traceback (most recent call last):
...
ValueError: Input list must not be empty
"""
if not numbers:
raise ValueError("Input list must not be empty")

max_heap: list[int] = []
min_heap: list[int] = []
median = 0
result: list[int] = []

for element in numbers:
median = call_median(element, max_heap, min_heap, median)
result.append(median)

return result