Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add Exponential Search algorithm to searches module #12645

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
83 changes: 83 additions & 0 deletions data_structures/disjoint_set/union_find.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Union-Find (Disjoint Set Union) with Path Compression and Union by Rank

Use Case:
- Efficient structure to manage disjoint sets
- Useful in network connectivity, Kruskal's MST, and clustering

Time Complexity:
- Nearly constant: O(α(n)) where α is the inverse Ackermann function

Author: Michael Alexander Montoya
"""


class UnionFind:
def __init__(self, size: int) -> None:
"""
Initializes a Union-Find data structure with `size` elements.

>>> uf = UnionFind(5)
>>> uf.find(0)
0
"""
self.parent = list(range(size))
self.rank = [0] * size

def find(self, node: int) -> int:
"""
Finds the representative/root of the set that `node` belongs to.

>>> uf = UnionFind(5)
>>> uf.find(3)
3
"""
if self.parent[node] != node:
self.parent[node] = self.find(self.parent[node]) # Path compression
return self.parent[node]

def union(self, node_a: int, node_b: int) -> bool:
"""
Unites the sets that contain elements `node_a` and `node_b`.

>>> uf = UnionFind(5)
>>> uf.union(0, 1)
True
>>> uf.find(1) == uf.find(0)
True
>>> uf.union(0, 1)
False
"""
root_a = self.find(node_a)
root_b = self.find(node_b)

if root_a == root_b:
return False # Already connected

if self.rank[root_a] < self.rank[root_b]:
self.parent[root_a] = root_b
elif self.rank[root_a] > self.rank[root_b]:
self.parent[root_b] = root_a
else:
self.parent[root_b] = root_a
self.rank[root_a] += 1

return True


if __name__ == "__main__":
import doctest

doctest.testmod()

uf = UnionFind(10)
uf.union(1, 2)
uf.union(2, 3)
uf.union(4, 5)

print("1 and 3 connected:", uf.find(1) == uf.find(3)) # True
print("1 and 5 connected:", uf.find(1) == uf.find(5)) # False

uf.union(3, 5)

print("1 and 5 connected after union:", uf.find(1) == uf.find(5)) # True
129 changes: 32 additions & 97 deletions searches/exponential_search.py
Original file line number Diff line number Diff line change
@@ -1,113 +1,48 @@
#!/usr/bin/env python3

"""
Pure Python implementation of exponential search algorithm
Exponential Search Algorithm

For more information, see the Wikipedia page:
https://en.wikipedia.org/wiki/Exponential_search
Time Complexity:
- Best Case: O(1)
- Average/Worst Case: O(log i), where i is the index of the first element >= target

For doctests run the following command:
python3 -m doctest -v exponential_search.py
Use Case:
Efficient for searching in sorted arrays where the target is near the beginning.

For manual testing run:
python3 exponential_search.py
Author: Michael Alexander Montoya
"""

from __future__ import annotations


def binary_search_by_recursion(
sorted_collection: list[int], item: int, left: int = 0, right: int = -1
) -> int:
"""Pure implementation of binary search algorithm in Python using recursion

Be careful: the collection must be ascending sorted otherwise, the result will be
unpredictable.

:param sorted_collection: some ascending sorted collection with comparable items
:param item: item value to search
:param left: starting index for the search
:param right: ending index for the search
:return: index of the found item or -1 if the item is not found

Examples:
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
0
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
4
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
1
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
-1
"""
if right < 0:
right = len(sorted_collection) - 1
if list(sorted_collection) != sorted(sorted_collection):
raise ValueError("sorted_collection must be sorted in ascending order")
if right < left:
def exponential_search(arr, target):
if len(arr) == 0:
return -1

midpoint = left + (right - left) // 2

if sorted_collection[midpoint] == item:
return midpoint
elif sorted_collection[midpoint] > item:
return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
else:
return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)


def exponential_search(sorted_collection: list[int], item: int) -> int:
"""
Pure implementation of an exponential search algorithm in Python.
For more information, refer to:
https://en.wikipedia.org/wiki/Exponential_search

Be careful: the collection must be ascending sorted, otherwise the result will be
unpredictable.

:param sorted_collection: some ascending sorted collection with comparable items
:param item: item value to search
:return: index of the found item or -1 if the item is not found

The time complexity of this algorithm is O(log i) where i is the index of the item.

Examples:
>>> exponential_search([0, 5, 7, 10, 15], 0)
0
>>> exponential_search([0, 5, 7, 10, 15], 15)
4
>>> exponential_search([0, 5, 7, 10, 15], 5)
1
>>> exponential_search([0, 5, 7, 10, 15], 6)
-1
"""
if list(sorted_collection) != sorted(sorted_collection):
raise ValueError("sorted_collection must be sorted in ascending order")

if sorted_collection[0] == item:
if arr[0] == target:
return 0

bound = 1
while bound < len(sorted_collection) and sorted_collection[bound] < item:
bound *= 2
# Find range for binary search by repeated doubling
index = 1
while index < len(arr) and arr[index] <= target:
index *= 2

left = bound // 2
right = min(bound, len(sorted_collection) - 1)
return binary_search_by_recursion(sorted_collection, item, left, right)
# Perform binary search in the found range
return binary_search(arr, target, index // 2, min(index, len(arr) - 1))


if __name__ == "__main__":
import doctest
def binary_search(arr, target, left, right):
while left <= right:
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return -1

doctest.testmod()

# Manual testing
user_input = input("Enter numbers separated by commas: ").strip()
collection = sorted(int(item) for item in user_input.split(","))
target = int(input("Enter a number to search for: "))
result = exponential_search(sorted_collection=collection, item=target)
if result == -1:
print(f"{target} was not found in {collection}.")
else:
print(f"{target} was found at index {result} in {collection}.")
# Example usage:
if __name__ == "__main__":
array = [1, 3, 5, 7, 9, 13, 17, 21, 24, 27, 30]
target = 13
result = exponential_search(array, target)
print(f"Target {target} found at index: {result}")
54 changes: 54 additions & 0 deletions searches/reservoir_sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Reservoir Sampling Algorithm

Use Case:
Efficient for selecting `sample_size` random items from a data stream of unknown size,
or when the entire dataset cannot fit into memory.

Time Complexity:
- O(n), where n is the total number of items
- Space Complexity: O(sample_size)

Author: Michael Alexander Montoya
"""

import random
from typing import Iterable


def reservoir_sampling(stream: Iterable[int], sample_size: int) -> list[int]:
"""
Performs reservoir sampling on a stream of items.

Args:
stream: An iterable data stream.
sample_size: Number of items to sample.

Returns:
A list containing `sample_size` randomly sampled items from the stream.

>>> result = reservoir_sampling(range(1, 1001), 10)
>>> len(result) == 10
True
"""
reservoir = []

for i, item in enumerate(stream):
if i < sample_size:
reservoir.append(item)
else:
j = random.randint(0, i)
if j < sample_size:
reservoir[j] = item

return reservoir


if __name__ == "__main__":
import doctest

doctest.testmod()

stream_data = range(1, 1001)
sample = reservoir_sampling(stream_data, 10)
print(f"Sampled items: {sample}")