TheAlgorithms · cureprotocols · Mar 29, 2025 · Mar 29, 2025 · Mar 30, 2025 · Mar 30, 2025
diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
@@ -0,0 +1,83 @@
+"""
+Union-Find (Disjoint Set Union) with Path Compression and Union by Rank
+
+Use Case:
+- Efficient structure to manage disjoint sets
+- Useful in network connectivity, Kruskal's MST, and clustering
+
+Time Complexity:
+- Nearly constant: O(α(n)) where α is the inverse Ackermann function
+
+Author: Michael Alexander Montoya
+"""
+
+
+class UnionFind:
+    def __init__(self, size: int) -> None:
+        """
+        Initializes a Union-Find data structure with `size` elements.
+
+        >>> uf = UnionFind(5)
+        >>> uf.find(0)
+        0
+        """
+        self.parent = list(range(size))
+        self.rank = [0] * size
+
+    def find(self, node: int) -> int:
+        """
+        Finds the representative/root of the set that `node` belongs to.
+
+        >>> uf = UnionFind(5)
+        >>> uf.find(3)
+        3
+        """
+        if self.parent[node] != node:
+            self.parent[node] = self.find(self.parent[node])  # Path compression
+        return self.parent[node]
+
+    def union(self, node_a: int, node_b: int) -> bool:
+        """
+        Unites the sets that contain elements `node_a` and `node_b`.
+
+        >>> uf = UnionFind(5)
+        >>> uf.union(0, 1)
+        True
+        >>> uf.find(1) == uf.find(0)
+        True
+        >>> uf.union(0, 1)
+        False
+        """
+        root_a = self.find(node_a)
+        root_b = self.find(node_b)
+
+        if root_a == root_b:
+            return False  # Already connected
+
+        if self.rank[root_a] < self.rank[root_b]:
+            self.parent[root_a] = root_b
+        elif self.rank[root_a] > self.rank[root_b]:
+            self.parent[root_b] = root_a
+        else:
+            self.parent[root_b] = root_a
+            self.rank[root_a] += 1
+
+        return True
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+
+    uf = UnionFind(10)
+    uf.union(1, 2)
+    uf.union(2, 3)
+    uf.union(4, 5)
+
+    print("1 and 3 connected:", uf.find(1) == uf.find(3))  # True
+    print("1 and 5 connected:", uf.find(1) == uf.find(5))  # False
+
+    uf.union(3, 5)
+
+    print("1 and 5 connected after union:", uf.find(1) == uf.find(5))  # True
diff --git a/searches/exponential_search.py b/searches/exponential_search.py
@@ -1,113 +1,48 @@
-#!/usr/bin/env python3
-
 """
-Pure Python implementation of exponential search algorithm
+Exponential Search Algorithm
 
-For more information, see the Wikipedia page:
-https://en.wikipedia.org/wiki/Exponential_search
+Time Complexity:
+- Best Case: O(1)
+- Average/Worst Case: O(log i), where i is the index of the first element >= target
 
-For doctests run the following command:
-python3 -m doctest -v exponential_search.py
+Use Case:
+Efficient for searching in sorted arrays where the target is near the beginning.
 
-For manual testing run:
-python3 exponential_search.py
+Author: Michael Alexander Montoya
 """
 
-from __future__ import annotations
-
-
-def binary_search_by_recursion(
-    sorted_collection: list[int], item: int, left: int = 0, right: int = -1
-) -> int:
-    """Pure implementation of binary search algorithm in Python using recursion
-
-    Be careful: the collection must be ascending sorted otherwise, the result will be
-    unpredictable.
 
-    :param sorted_collection: some ascending sorted collection with comparable items
-    :param item: item value to search
-    :param left: starting index for the search
-    :param right: ending index for the search
-    :return: index of the found item or -1 if the item is not found
-
-    Examples:
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
-    0
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
-    4
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
-    1
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
-    -1
-    """
-    if right < 0:
-        right = len(sorted_collection) - 1
-    if list(sorted_collection) != sorted(sorted_collection):
-        raise ValueError("sorted_collection must be sorted in ascending order")
-    if right < left:
+def exponential_search(arr, target):
+    if len(arr) == 0:
         return -1
 
-    midpoint = left + (right - left) // 2
-
-    if sorted_collection[midpoint] == item:
-        return midpoint
-    elif sorted_collection[midpoint] > item:
-        return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
-    else:
-        return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)
-
-
-def exponential_search(sorted_collection: list[int], item: int) -> int:
-    """
-    Pure implementation of an exponential search algorithm in Python.
-    For more information, refer to:
-    https://en.wikipedia.org/wiki/Exponential_search
-
-    Be careful: the collection must be ascending sorted, otherwise the result will be
-    unpredictable.
-
-    :param sorted_collection: some ascending sorted collection with comparable items
-    :param item: item value to search
-    :return: index of the found item or -1 if the item is not found
-
-    The time complexity of this algorithm is O(log i) where i is the index of the item.
-
-    Examples:
-    >>> exponential_search([0, 5, 7, 10, 15], 0)
-    0
-    >>> exponential_search([0, 5, 7, 10, 15], 15)
-    4
-    >>> exponential_search([0, 5, 7, 10, 15], 5)
-    1
-    >>> exponential_search([0, 5, 7, 10, 15], 6)
-    -1
-    """
-    if list(sorted_collection) != sorted(sorted_collection):
-        raise ValueError("sorted_collection must be sorted in ascending order")
-
-    if sorted_collection[0] == item:
+    if arr[0] == target:
         return 0
 
-    bound = 1
-    while bound < len(sorted_collection) and sorted_collection[bound] < item:
-        bound *= 2
+    # Find range for binary search by repeated doubling
+    index = 1
+    while index < len(arr) and arr[index] <= target:
+        index *= 2
 
-    left = bound // 2
-    right = min(bound, len(sorted_collection) - 1)
-    return binary_search_by_recursion(sorted_collection, item, left, right)
+    # Perform binary search in the found range
+    return binary_search(arr, target, index // 2, min(index, len(arr) - 1))
 
 
-if __name__ == "__main__":
-    import doctest
+def binary_search(arr, target, left, right):
+    while left <= right:
+        mid = (left + right) // 2
+        if arr[mid] == target:
+            return mid
+        elif arr[mid] < target:
+            left = mid + 1
+        else:
+            right = mid - 1
+    return -1
 
-    doctest.testmod()
 
-    # Manual testing
-    user_input = input("Enter numbers separated by commas: ").strip()
-    collection = sorted(int(item) for item in user_input.split(","))
-    target = int(input("Enter a number to search for: "))
-    result = exponential_search(sorted_collection=collection, item=target)
-    if result == -1:
-        print(f"{target} was not found in {collection}.")
-    else:
-        print(f"{target} was found at index {result} in {collection}.")
+# Example usage:
+if __name__ == "__main__":
+    array = [1, 3, 5, 7, 9, 13, 17, 21, 24, 27, 30]
+    target = 13
+    result = exponential_search(array, target)
+    print(f"Target {target} found at index: {result}")
diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py
@@ -0,0 +1,54 @@
+"""
+Reservoir Sampling Algorithm
+
+Use Case:
+Efficient for selecting `sample_size` random items from a data stream of unknown size,
+or when the entire dataset cannot fit into memory.
+
+Time Complexity:
+- O(n), where n is the total number of items
+- Space Complexity: O(sample_size)
+
+Author: Michael Alexander Montoya
+"""
+
+import random
+from typing import Iterable
+
+
+def reservoir_sampling(stream: Iterable[int], sample_size: int) -> list[int]:
+    """
+    Performs reservoir sampling on a stream of items.
+
+    Args:
+        stream: An iterable data stream.
+        sample_size: Number of items to sample.
+
+    Returns:
+        A list containing `sample_size` randomly sampled items from the stream.
+
+    >>> result = reservoir_sampling(range(1, 1001), 10)
+    >>> len(result) == 10
+    True
+    """
+    reservoir = []
+
+    for i, item in enumerate(stream):
+        if i < sample_size:
+            reservoir.append(item)
+        else:
+            j = random.randint(0, i)
+            if j < sample_size:
+                reservoir[j] = item
+
+    return reservoir
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+
+    stream_data = range(1, 1001)
+    sample = reservoir_sampling(stream_data, 10)
+    print(f"Sampled items: {sample}")