diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a821aa9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ + +/target +**/*.rs.bk +Cargo.lock diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..74c92ea --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,24 @@ +# This file is a template, and might need editing before it works on your project. +# Official language image. Look for the different tagged releases at: +# https://hub.docker.com/r/library/rust/tags/ +image: "rust:latest" + +# Optional: Pick zero or more services to be used on all builds. +# Only needed when using a docker container to run your tests in. +# Check out: http://docs.gitlab.com/ce/ci/docker/using_docker_images.html#what-is-a-service +#services: +# - mysql:latest +# - redis:latest +# - postgres:latest + +# Optional: Install a C compiler, cmake and git into the container. +# You will often need this when you (or any of your dependencies) depends on C code. +#before_script: +#- apt-get update -yqq +#- apt-get install -yqq --no-install-recommends build-essential + +# Use cargo to test the project +test:cargo: + script: + - rustc --version && cargo --version # Print version info for debugging + - cargo test --all --verbose diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..613564f --- /dev/null +++ b/.travis.yml @@ -0,0 +1 @@ +language: rust \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..4de41f8 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,5 @@ +1. Fork this project +2. Create a feature branch (git checkout -b feature-name) +3. Commit your changes (git commit -am 'Cool new feature') +4. Push to the branch (git push origin feature-name) +5. Create a new Pull Request diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1d45b2b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "clusters" +version = "0.1.0" +authors = ["Alan K "] + +[dependencies] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..60306c9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Data Structures and Algorithms in Rust + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b60632 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# Clusters + +Useful traits for clustering algorithms. + +This library can be used as a basis for implementing a wide variety of 'distance' based clustering algorithms, such as [DBSCAN](1). + +## Usage + +The library is written entirely in Rust. To use it in your project, add the latest stable version of this crate to your `Cargo.toml` dependencies. + +**Cargo.toml** + +```toml +[dependencies] +clusters = "0.1" +``` + +In your project, import the library using the following code: + +```rust +extern crate clusters; +use clusters::{Algorithm, Proximity} +``` + +Note that the above code makes the `Algorithm` and `Proximity` traits available for usage in the given scope, without needing the `clusters::` qualification. For instance: + +```rust +struct DBSCAN { ... } + +impl Algorithm for DBSCAN {...} +``` + +## Examples + +**TODO** + +## Tests + +**TODO** + +## Versioning + +This project uses SemVer for versioning. For the versions available, see the releases tagged on this repository. + +## Authors + +_Primary:_ Alan K @savish + +## License + +This project is licensed under the MIT License - see the LICENSE.md file for details + +## Contributing + +Please see [CONTRIBUTING.md](2) for the process of contributing to this repo. + +[1]: https://en.wikipedia.org/wiki/DBSCAN +[2]: ./CONTRIBUTING.md diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7af1d48 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,68 @@ +/// Functionality to determine the proximity between datapoints +/// +/// This crate provides low level traits specifically for distance based +/// clustering algorithms. However, the 'distance' function result need not +/// correspond to the classical definition of the word. It could be a time +/// difference or any other measurable and comparable difference between two +/// data points +pub trait Proximity { + type Output: PartialOrd + Copy; + + /// Returns the 'distance' between this datapoint and another + /// + /// # Example + /// + /// ```rust + /// use clusters::Proximity; + /// + /// // Create a struct + /// struct Num(i32); + /// + /// // Implement this trait + /// impl Proximity for Num { + /// type Output = i32; + /// + /// fn distance(&self, other: &Num) -> i32 { + /// let Num(me) = self; + /// let Num(you) = other; + /// you - me + /// } + /// } + /// ``` + fn distance(&self, other: &Other) -> Self::Output; + + /// Returns `true` if the two data points are 'close' to each other + /// + /// For this default implementation, 'close' means any distance that is + /// less than or equal to the `epsilon` parameter value. + /// + /// ## Params + /// - `other` The other datapoint + /// - `epsilon` The concept of proximity is determined by this parameter + fn is_near(&self, other: &Other, epsilon: Self::Output) -> bool { + self.distance(other) <= epsilon + } +} + +/// Functionality for accessing clustered data +/// +/// Implementing this trait provides a common interface into clustered data, +/// without restricting the structure of the data to a specific, concrete type. +/// For instance, clustered data could be stored in a `HashMap` or a `Vec<(,)>` +pub trait Clustered { + /// Returns a list of clustered datapoints + fn clusters(&self) -> Vec>; + + /// Returns a list of all the datapoints that didn't fit into any clusters + fn noise(&self) -> Vec; +} + +/// Base functionality of a clustering algorithm +pub trait Algorithm { + /// Cluster the algorithm's data + /// + /// To accomodate the various required parameters of different algorithms, + /// it is expected that this trait is implemented by types that have + /// access to their requirements. For instance, as fields in a struct. + fn cluster(&self, clusterables: &[T]) -> Box>; +}