diff --git a/CountMinSketch/CountMinSketch.playground/Contents.swift b/CountMinSketch/CountMinSketch.playground/Contents.swift new file mode 100644 index 000000000..07cdd1146 --- /dev/null +++ b/CountMinSketch/CountMinSketch.playground/Contents.swift @@ -0,0 +1,111 @@ +//: # CountMin Sketch +import Foundation + +/// Private wrapper around Hashing, allowing hash different Hashables and keep their value +private final class Hashing<T> where T: Hashable { + private var map: [T: Int] = [:] + + func hash(_ value: T) -> Int { + if let hash = map[value] { + return hash + } + var hasher = Hasher() + hasher.combine(value) + let newValue = abs(hasher.finalize()) + map[value] = newValue + return newValue + } +} + +/* + A class for counting hashable items using the Count-min Sketch strategy. + It fulfills a similar purpose than `itertools.Counter`. + The Count-min Sketch is a randomized data structure that uses a constant + amount of memory and has constant insertion and lookup times at the cost + of an arbitrarily small overestimation of the counts. +*/ +public final class CountMinSketch<T> where T: Hashable { + private var hashers: [Hashing<T>] = [] + private var matrix: [[UInt64]] = [] + private let rows: Int + private let cols: Int + + /// The total amount of elements adedd to the model + private(set) var count: UInt64 = 0 + /// init - will determine the matrix size + /// - Parameters: + /// - rows: the size of the hash tables, larger implies smaller overestimation + /// - cols: the amount of hash tables, larger implies lower probability of + init(rows: Int, cols: Int) { + self.rows = rows + self.cols = cols + for _ in 0..<self.rows { + hashers.append(Hashing()) + matrix.append([UInt64](repeating: 0, count: self.cols)) + } + } + + /// Init - will determine the matrix size. s.t CountMin sketch guarantees approximation error on point queries more than epsilon * F1 (where F1 is the Frequency of first order of the stream) with probability `delta` in space O(1 \ epsilon * log(1 \ delta)) + /// - Parameters: + /// - delta: the probability for an error bigger than epsilon + /// - epsilon: the error from the actual value + init(delta: CGFloat, epsilon: CGFloat) { + self.rows = Int(log2(1/delta).rounded(.up)) + self.cols = Int((2/epsilon).rounded(.up)) + for _ in 0..<self.rows { + hashers.append(Hashing()) + matrix.append([UInt64](repeating: 0, count: self.cols)) + } + } + + // Adding elemets to count, by default we increase the element count by one + // But we extended the API to allow increasing the count in batches + + /// Adding an element to the sketch + /// - Parameters: + /// - element: the element to add, must conform to hashable (described by T in the class definition) + /// - value: the value (i.e amount) that we want to increase the element count by + func add(element: T, value: UInt64=1) { + self.count += value + for row in 0..<self.rows { + let hash = self.hashers[row].hash(element) + let col = hash % self.cols + self.matrix[row][col] += value + } + } + + /// Querying an element appearances + /// - Parameter element: the element we want to get an estimation for + /// - Returns: estimation of the amount of time that elememt was `add` + func query(element: T) -> UInt64 { + var values = [UInt64]() + for row in 0..<self.rows { + let hash = self.hashers[row].hash(element) + let col = hash % self.cols + let value = self.matrix[row][col] + values.append(value) + } + return values.min()! + } +} + + +//: EXAMPLES +//: Let's create a sketch + +let stream: [Int] = [ + 1,2,3,4,5,5 ,1,23,43,23,4534,345,234,2,3423,234,23,42,453,45,345,23,2,343,45,345,34 +] + +let sketch = CountMinSketch<Int>(rows: 10, cols: 10) + +for element in stream { + sketch.add(element: element) +} + +assert(sketch.count == stream.count) + +print("We have \(sketch.count) elements in the stream") + + +print("The frequency of 1 is \(sketch.query(element: 1))") diff --git a/CountMinSketch/CountMinSketch.playground/contents.xcplayground b/CountMinSketch/CountMinSketch.playground/contents.xcplayground new file mode 100644 index 000000000..fd676d5b4 --- /dev/null +++ b/CountMinSketch/CountMinSketch.playground/contents.xcplayground @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<playground version='5.0' target-platform='osx' display-mode='raw'> + <timeline fileName='timeline.xctimeline'/> +</playground> \ No newline at end of file diff --git a/CountMinSketch/CountMinSketch.playground/playground.xcworkspace/contents.xcworkspacedata b/CountMinSketch/CountMinSketch.playground/playground.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..919434a62 --- /dev/null +++ b/CountMinSketch/CountMinSketch.playground/playground.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Workspace + version = "1.0"> + <FileRef + location = "self:"> + </FileRef> +</Workspace> diff --git a/CountMinSketch/CountMinSketch.swift b/CountMinSketch/CountMinSketch.swift new file mode 100644 index 000000000..b8e4ece55 --- /dev/null +++ b/CountMinSketch/CountMinSketch.swift @@ -0,0 +1,89 @@ +import Foundation + +/// Private wrapper around Hashing, allowing hash different Hashables and keep their value +private final class Hashing<T> where T: Hashable { + private var map: [T: Int] = [:] + + func hash(_ value: T) -> Int { + if let hash = map[value] { + return hash + } + var hasher = Hasher() + hasher.combine(value) + let newValue = abs(hasher.finalize()) + map[value] = newValue + return newValue + } +} + +/* + A class for counting hashable items using the Count-min Sketch strategy. + It fulfills a similar purpose than `itertools.Counter`. + The Count-min Sketch is a randomized data structure that uses a constant + amount of memory and has constant insertion and lookup times at the cost + of an arbitrarily small overestimation of the counts. +*/ +public final class CountMinSketch<T> where T: Hashable { + private var hashers: [Hashing<T>] = [] + private var matrix: [[UInt64]] = [] + private let rows: Int + private let cols: Int + + /// The total amount of elements adedd to the model + private(set) var count: UInt64 = 0 + /// init - will determine the matrix size + /// - Parameters: + /// - rows: the size of the hash tables, larger implies smaller overestimation + /// - cols: the amount of hash tables, larger implies lower probability of + init(rows: Int, cols: Int) { + self.rows = rows + self.cols = cols + for _ in 0..<self.rows { + hashers.append(Hashing()) + matrix.append([UInt64](repeating: 0, count: self.cols)) + } + } + + /// Init - will determine the matrix size. s.t CountMin sketch guarantees approximation error on point queries more than epsilon * F1 (where F1 is the Frequency of first order of the stream) with probability `delta` in space O(1 \ epsilon * log(1 \ delta)) + /// - Parameters: + /// - delta: the probability for an error bigger than epsilon + /// - epsilon: the error from the actual value + init(delta: CGFloat, epsilon: CGFloat) { + self.rows = Int(log2(1/delta).rounded(.up)) + self.cols = Int((2/epsilon).rounded(.up)) + for _ in 0..<self.rows { + hashers.append(Hashing()) + matrix.append([UInt64](repeating: 0, count: self.cols)) + } + } + + // Adding elemets to count, by default we increase the element count by one + // But we extended the API to allow increasing the count in batches + + /// Adding an element ot the sketch + /// - Parameters: + /// - element: the element to add, must conform to hashable (described by T in the class definition) + /// - value: the value (i.e amount) that we want to increase the element count by + func add(element: T, value: UInt64=1) { + self.count += value + for row in 0..<self.rows { + let hash = self.hashers[row].hash(element) + let col = hash % self.cols + self.matrix[row][col] += value + } + } + + /// Querying an element appearances + /// - Parameter element: the element we want to get an estimation for + /// - Returns: estimation of the amount of time that elememt was `add` + func query(element: T) -> UInt64 { + var values = [UInt64]() + for row in 0..<self.rows { + let hash = self.hashers[row].hash(element) + let col = hash % self.cols + let value = self.matrix[row][col] + values.append(value) + } + return values.min()! + } +} diff --git a/CountMinSketch/Images/algorithm.png b/CountMinSketch/Images/algorithm.png new file mode 100644 index 000000000..0296afc81 Binary files /dev/null and b/CountMinSketch/Images/algorithm.png differ diff --git a/CountMinSketch/Images/matrix_def.png b/CountMinSketch/Images/matrix_def.png new file mode 100644 index 000000000..2c76e0c6a Binary files /dev/null and b/CountMinSketch/Images/matrix_def.png differ diff --git a/CountMinSketch/README.markdown b/CountMinSketch/README.markdown new file mode 100644 index 000000000..ee4f3c93c --- /dev/null +++ b/CountMinSketch/README.markdown @@ -0,0 +1,27 @@ +# CountMin Sketch + +#### Explanation about the model (Taken from wikipedia) +In computing, the count–min sketch (CM sketch) is a probabilistic data structure that serves as a frequency table of events in a stream of data. It uses hash functions to map events to frequencies, but unlike a hash table uses only sub-linear space, at the expense of overcounting some events due to collisions. The count–min sketch was invented in 2003 by Graham Cormode and S. Muthu Muthukrishnan and described by them in a [2005 paper](https://www.sciencedirect.com/science/article/abs/pii/S0196677403001913?via%3Dihub). + +The goal of the basic version of the count–min sketch is to consume a stream of events, one at a time, and count the frequency of the different types of events in the stream. At any time, the sketch can be queried for the frequency of a particular event type i from a universe of event types {U}, and will return an estimate of this frequency that is within a certain distance of the true frequency, with a certain probability. + +The actual sketch data structure is a two-dimensional array of w columns and d rows. The parameters w and d are fixed when the sketch is created, and determine the time and space needs and the probability of error when the sketch is queried for a frequency or inner product. Associated with each of the d rows is a separate hash function; the hash functions must be pairwise independent. The parameters w and d can be chosen by setting w = ⌈2/ε⌉ and d = ⌈ln 1/δ⌉, where the error in answering a query is within an additive factor of ε with probability 1 − δ +When a new event of type i arrives we update as follows: for each row j of the table, apply the corresponding hash function to obtain a column index k = hj(i). Then increment the value in row j, column k by one. + + + + + +#### Implementation details +1. Memory consumption - We hold a matrix in the size according to the probalictic charactaristic the user wish, specifically we will have cols = ⌈2/ε⌉ and row = ⌈ln 1/δ⌉ +2. `add` function - Given the assumption applying a hash function takes O(1) as well as arithmetic addition `adding` an element shall take O(⌈ln 1/δ⌉) = O(number of rows) +3. `query` - Same as adding - O(⌈ln 1/δ⌉) = O(number of rows) + + +#### How is this different from a regular counter +This model allows us to use sublinear space to estimate the frequecny of elements if a stream. +While a regualr counter will have to maintain some mapping between each element to its frequency, this model allows us to use probability and have a smaller memory footprint. +This value of this Data Structure makes it particulary benefitial for huge data streams where it is not feasable to hold an exact counter for each elements as the stream is potentially endless. + + +*Written by Daniel Bachar* diff --git a/CountMinSketch/Tests/Tests.xcodeproj/project.pbxproj b/CountMinSketch/Tests/Tests.xcodeproj/project.pbxproj new file mode 100644 index 000000000..19edde1d2 --- /dev/null +++ b/CountMinSketch/Tests/Tests.xcodeproj/project.pbxproj @@ -0,0 +1,277 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 006104711F361359007A6F50 /* CountMinSketchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 006104701F361359007A6F50 /* CountMinSketchTests.swift */; }; + 4FFB2DF12787B1DE00B84DDC /* CountMinSketch.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4FFB2DF02787B1DE00B84DDC /* CountMinSketch.swift */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 0061046D1F361359007A6F50 /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 006104701F361359007A6F50 /* CountMinSketchTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CountMinSketchTests.swift; sourceTree = "<group>"; }; + 006104721F361359007A6F50 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; + 4FFB2DF02787B1DE00B84DDC /* CountMinSketch.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = CountMinSketch.swift; path = ../../CountMinSketch.swift; sourceTree = "<group>"; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 0061046A1F361359007A6F50 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 006104621F36104C007A6F50 = { + isa = PBXGroup; + children = ( + 0061046F1F361359007A6F50 /* Tests */, + 0061046E1F361359007A6F50 /* Products */, + ); + sourceTree = "<group>"; + }; + 0061046E1F361359007A6F50 /* Products */ = { + isa = PBXGroup; + children = ( + 0061046D1F361359007A6F50 /* Tests.xctest */, + ); + name = Products; + sourceTree = "<group>"; + }; + 0061046F1F361359007A6F50 /* Tests */ = { + isa = PBXGroup; + children = ( + 006104701F361359007A6F50 /* CountMinSketchTests.swift */, + 4FFB2DF02787B1DE00B84DDC /* CountMinSketch.swift */, + 006104721F361359007A6F50 /* Info.plist */, + ); + path = Tests; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 0061046C1F361359007A6F50 /* Tests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 006104731F361359007A6F50 /* Build configuration list for PBXNativeTarget "Tests" */; + buildPhases = ( + 006104691F361359007A6F50 /* Sources */, + 0061046A1F361359007A6F50 /* Frameworks */, + 0061046B1F361359007A6F50 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = Tests; + productName = Tests; + productReference = 0061046D1F361359007A6F50 /* Tests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 006104631F36104C007A6F50 /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 0830; + LastUpgradeCheck = 0830; + TargetAttributes = { + 0061046C1F361359007A6F50 = { + CreatedOnToolsVersion = 8.3.3; + LastSwiftMigration = 0900; + ProvisioningStyle = Automatic; + }; + }; + }; + buildConfigurationList = 006104661F36104C007A6F50 /* Build configuration list for PBXProject "Tests" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + English, + en, + ); + mainGroup = 006104621F36104C007A6F50; + productRefGroup = 0061046E1F361359007A6F50 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 0061046C1F361359007A6F50 /* Tests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 0061046B1F361359007A6F50 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 006104691F361359007A6F50 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4FFB2DF12787B1DE00B84DDC /* CountMinSketch.swift in Sources */, + 006104711F361359007A6F50 /* CountMinSketchTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 006104671F36104C007A6F50 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + }; + name = Debug; + }; + 006104681F36104C007A6F50 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + }; + name = Release; + }; + 006104741F361359007A6F50 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COMBINE_HIDPI_IMAGES = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + INFOPLIST_FILE = Tests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks"; + MACOSX_DEPLOYMENT_TARGET = 10.12; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + PRODUCT_BUNDLE_IDENTIFIER = swift.algorithm.club.Tests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_SWIFT3_OBJC_INFERENCE = On; + SWIFT_VERSION = 4.0; + }; + name = Debug; + }; + 006104751F361359007A6F50 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "-"; + COMBINE_HIDPI_IMAGES = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + INFOPLIST_FILE = Tests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks"; + MACOSX_DEPLOYMENT_TARGET = 10.12; + MTL_ENABLE_DEBUG_INFO = NO; + PRODUCT_BUNDLE_IDENTIFIER = swift.algorithm.club.Tests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; + SWIFT_SWIFT3_OBJC_INFERENCE = On; + SWIFT_VERSION = 4.0; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 006104661F36104C007A6F50 /* Build configuration list for PBXProject "Tests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 006104671F36104C007A6F50 /* Debug */, + 006104681F36104C007A6F50 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 006104731F361359007A6F50 /* Build configuration list for PBXNativeTarget "Tests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 006104741F361359007A6F50 /* Debug */, + 006104751F361359007A6F50 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 006104631F36104C007A6F50 /* Project object */; +} diff --git a/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..6c0ea8493 --- /dev/null +++ b/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Workspace + version = "1.0"> + <FileRef + location = "self:Tests.xcodeproj"> + </FileRef> +</Workspace> diff --git a/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 000000000..18d981003 --- /dev/null +++ b/CountMinSketch/Tests/Tests.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>IDEDidComputeMac32BitWarning</key> + <true/> +</dict> +</plist> diff --git a/CountMinSketch/Tests/Tests.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme b/CountMinSketch/Tests/Tests.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme new file mode 100644 index 000000000..a3659d5ab --- /dev/null +++ b/CountMinSketch/Tests/Tests.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme @@ -0,0 +1,56 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Scheme + LastUpgradeVersion = "0830" + version = "1.3"> + <BuildAction + parallelizeBuildables = "YES" + buildImplicitDependencies = "YES"> + </BuildAction> + <TestAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + shouldUseLaunchSchemeArgsEnv = "YES"> + <Testables> + <TestableReference + skipped = "NO"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "0061046C1F361359007A6F50" + BuildableName = "Tests.xctest" + BlueprintName = "Tests" + ReferencedContainer = "container:Tests.xcodeproj"> + </BuildableReference> + </TestableReference> + </Testables> + <AdditionalOptions> + </AdditionalOptions> + </TestAction> + <LaunchAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + launchStyle = "0" + useCustomWorkingDirectory = "NO" + ignoresPersistentStateOnLaunch = "NO" + debugDocumentVersioning = "YES" + debugServiceExtension = "internal" + allowLocationSimulation = "YES"> + <AdditionalOptions> + </AdditionalOptions> + </LaunchAction> + <ProfileAction + buildConfiguration = "Release" + shouldUseLaunchSchemeArgsEnv = "YES" + savedToolIdentifier = "" + useCustomWorkingDirectory = "NO" + debugDocumentVersioning = "YES"> + </ProfileAction> + <AnalyzeAction + buildConfiguration = "Debug"> + </AnalyzeAction> + <ArchiveAction + buildConfiguration = "Release" + revealArchiveInOrganizer = "YES"> + </ArchiveAction> +</Scheme> diff --git a/CountMinSketch/Tests/Tests/CountMinSketchTests.swift b/CountMinSketch/Tests/Tests/CountMinSketchTests.swift new file mode 100644 index 000000000..c44b14e36 --- /dev/null +++ b/CountMinSketch/Tests/Tests/CountMinSketchTests.swift @@ -0,0 +1,34 @@ +import XCTest + + +class CountMinSketchTests: XCTestCase { + + func testZeroInit() { + let sketch = CountMinSketch<String>(delta: 0.01, epsilon: 0.01) + let elements = ["", "1", "b"] + for element in elements { + XCTAssertEqual(sketch.query(element: element), 0) + } + } + + func testSimpleUsage() { + let sketch = CountMinSketch<String>(delta: 0.01, epsilon: 0.01) + let expectedCount: UInt64 = 1000 + for _ in 0..<expectedCount { + sketch.add(element: "a") + } + + XCTAssertEqual(sketch.query(element: "a"), expectedCount) + XCTAssertEqual(sketch.query(element: "b"), 0) + XCTAssertEqual(sketch.count, expectedCount) + } + + func testIncreas() { + let sketch = CountMinSketch<String>(delta: 0.01, epsilon: 0.01) + sketch.add(element: "a", value: 10) + XCTAssertEqual(sketch.query(element: "a"), 10) + + sketch.add(element: "a", value: 20) + XCTAssertEqual(sketch.query(element: "a"), 30) + } +} diff --git a/CountMinSketch/Tests/Tests/Info.plist b/CountMinSketch/Tests/Tests/Info.plist new file mode 100644 index 000000000..6c6c23c43 --- /dev/null +++ b/CountMinSketch/Tests/Tests/Info.plist @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleDevelopmentRegion</key> + <string>en</string> + <key>CFBundleExecutable</key> + <string>$(EXECUTABLE_NAME)</string> + <key>CFBundleIdentifier</key> + <string>$(PRODUCT_BUNDLE_IDENTIFIER)</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>$(PRODUCT_NAME)</string> + <key>CFBundlePackageType</key> + <string>BNDL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleVersion</key> + <string>1</string> +</dict> +</plist>