From d494ebc8f42f13602a7e66d489ad15b90aa64a8c Mon Sep 17 00:00:00 2001 From: burdoto-home Date: Mon, 29 Oct 2018 19:54:43 +0100 Subject: [PATCH] Implemented referents for the extraction This commit works on Issue https://github.com/xdrop/fuzzywuzzy/issues/44 --- src/me/xdrop/fuzzywuzzy/Extractor.java | 88 ++++- src/me/xdrop/fuzzywuzzy/FuzzySearch.java | 335 ++++++++++++++++-- src/me/xdrop/fuzzywuzzy/ToStringFunction.java | 27 ++ .../fuzzywuzzy/model/ExtractedResult.java | 15 +- 4 files changed, 412 insertions(+), 53 deletions(-) create mode 100644 src/me/xdrop/fuzzywuzzy/ToStringFunction.java diff --git a/src/me/xdrop/fuzzywuzzy/Extractor.java b/src/me/xdrop/fuzzywuzzy/Extractor.java index 8fd5904..646e6c1 100644 --- a/src/me/xdrop/fuzzywuzzy/Extractor.java +++ b/src/me/xdrop/fuzzywuzzy/Extractor.java @@ -34,17 +34,34 @@ public Extractor with(int cutoff) { * @param func The function to apply * @return The list of results */ - public List extractWithoutOrder(String query, Collection choices, Applicable func) { + public List> extractWithoutOrder(String query, Collection choices, + Applicable func) { + return extractWithoutOrder(query, choices, ToStringFunction.DEFAULT, func); + } + + /** + * Returns the list of choices with their associated scores of similarity in a list + * of {@link ExtractedResult} + * + * @param query The query string + * @param choices The list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The function to apply + * @return The list of results + */ + public List> extractWithoutOrder(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func) { - List yields = new ArrayList<>(); + List> yields = new ArrayList<>(); int index = 0; - for (String s : choices) { + for (T t : choices) { + String s = toStringFunction.apply(t); int score = func.apply(query, s); if (score >= cutoff) { - yields.add(new ExtractedResult(s, score, index)); + yields.add(new ExtractedResult<>(t, s, score, index)); } index++; } @@ -57,13 +74,27 @@ public List extractWithoutOrder(String query, Collection choice, Applicable func) { + public ExtractedResult extractOne(String query, Collection choices, Applicable func) { + return extractOne(query, choices, ToStringFunction.DEFAULT, func); + } - List extracted = extractWithoutOrder(query, choice, func); + /** + * Find the single best match above a score in a list of choices. + * + * @param query A string to match against + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func Scoring function + * @return An object containing the best match and it's score + */ + public ExtractedResult extractOne(String query, Collection choices, ToStringFunction toStringFunction, + Applicable func) { + + List> extracted = extractWithoutOrder(query, choices, toStringFunction, func); return Collections.max(extracted); @@ -78,10 +109,25 @@ public ExtractedResult extractOne(String query, Collection choice, Appli * @param func The scoring function * @return A list of the results */ - public List extractTop(String query, Collection choices, Applicable func) { + public List> extractTop(String query, Collection choices, Applicable func) { + return extractTop(query, choices, ToStringFunction.DEFAULT, func); + } - List best = extractWithoutOrder(query, choices, func); - Collections.sort(best, Collections.reverseOrder()); + /** + * Creates a sorted list of {@link ExtractedResult} which contain the + * top @param limit most similar choices + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @return A list of the results + */ + public List> extractTop(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func) { + + List> best = extractWithoutOrder(query, choices, toStringFunction, func); + Collections.sort(best, Collections.>reverseOrder()); return best; } @@ -96,11 +142,27 @@ public List extractTop(String query, Collection choices * the search (k-top heap sort) is used * @return A list of the results */ - public List extractTop(String query, Collection choices, Applicable func, int limit) { + public List> extractTop(String query, Collection choices, Applicable func, int limit) { + return extractTop(query, choices, ToStringFunction.DEFAULT, func, limit); + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain the + * top @param limit most similar choices + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param limit Limits the number of results and speeds up + * the search (k-top heap sort) is used + * @return A list of the results + */ + public List> extractTop(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func, int limit) { - List best = extractWithoutOrder(query, choices, func); + List> best = extractWithoutOrder(query, choices, toStringFunction, func); - List results = Utils.findTopKHeap(best, limit); + List> results = Utils.findTopKHeap(best, limit); Collections.reverse(results); return results; diff --git a/src/me/xdrop/fuzzywuzzy/FuzzySearch.java b/src/me/xdrop/fuzzywuzzy/FuzzySearch.java index 12c83ef..2258da1 100644 --- a/src/me/xdrop/fuzzywuzzy/FuzzySearch.java +++ b/src/me/xdrop/fuzzywuzzy/FuzzySearch.java @@ -249,11 +249,11 @@ public static int weightedRatio(String s1, String s2, StringProcessor stringProc * @param func The scoring function * @return A list of the results */ - public static List extractTop(String query, Collection choices, - Applicable func, int limit, int cutoff) { + public static List> extractTop(String query, Collection choices, + Applicable func, int limit, int cutoff) { Extractor extractor = new Extractor(cutoff); - return extractor.extractTop(query, choices, func, limit); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, func, limit); } @@ -268,11 +268,11 @@ public static List extractTop(String query, Collection * @param cutoff Rejects any entries with score below this * @return A list of the results */ - public static List extractTop(String query, Collection choices, - int limit, int cutoff) { + public static List> extractTop(String query, Collection choices, + int limit, int cutoff) { Extractor extractor = new Extractor(cutoff); - return extractor.extractTop(query, choices, new WeightedRatio(), limit); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, new WeightedRatio(), limit); } @@ -286,12 +286,12 @@ public static List extractTop(String query, Collection * @param limit The number of results to return * @return A list of the results */ - public static List extractTop(String query, Collection choices, - Applicable func, int limit) { + public static List> extractTop(String query, Collection choices, + Applicable func, int limit) { Extractor extractor = new Extractor(); - return extractor.extractTop(query, choices, func, limit); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, func, limit); } @@ -304,12 +304,12 @@ public static List extractTop(String query, Collection * @param limit The number of results to return * @return A list of the results */ - public static List extractTop(String query, Collection choices, - int limit) { + public static List> extractTop(String query, Collection choices, + int limit) { Extractor extractor = new Extractor(); - return extractor.extractTop(query, choices, new WeightedRatio(), limit); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, new WeightedRatio(), limit); } @@ -322,11 +322,11 @@ public static List extractTop(String query, Collection * @param func The scoring function * @return A list of the results */ - public static List extractSorted(String query, Collection choices, Applicable func) { + public static List> extractSorted(String query, Collection choices, Applicable func) { Extractor extractor = new Extractor(); - return extractor.extractTop(query, choices, func); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, func); } @@ -341,12 +341,12 @@ public static List extractSorted(String query, Collection extractSorted(String query, Collection choices, Applicable func, - int cutoff) { + public static List> extractSorted(String query, Collection choices, Applicable func, + int cutoff) { Extractor extractor = new Extractor(cutoff); - return extractor.extractTop(query, choices, func); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, func); } @@ -358,11 +358,11 @@ public static List extractSorted(String query, Collection extractSorted(String query, Collection choices) { + public static List> extractSorted(String query, Collection choices) { Extractor extractor = new Extractor(); - return extractor.extractTop(query, choices, new WeightedRatio()); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, new WeightedRatio()); } @@ -375,12 +375,12 @@ public static List extractSorted(String query, Collection extractSorted(String query, Collection choices, - int cutoff) { + public static List> extractSorted(String query, Collection choices, + int cutoff) { Extractor extractor = new Extractor(cutoff); - return extractor.extractTop(query, choices, new WeightedRatio()); + return extractor.extractTop(query, choices, ToStringFunction.DEFAULT, new WeightedRatio()); } @@ -393,11 +393,11 @@ public static List extractSorted(String query, Collection extractAll(String query, Collection choices, Applicable func) { + public static List> extractAll(String query, Collection choices, Applicable func) { Extractor extractor = new Extractor(); - return extractor.extractWithoutOrder(query, choices, func); + return extractor.extractWithoutOrder(query, choices, ToStringFunction.DEFAULT, func); } @@ -411,12 +411,12 @@ public static List extractAll(String query, Collection * @param cutoff Keep only scores above cutoff * @return A list of the results */ - public static List extractAll(String query, Collection choices, Applicable func, - int cutoff) { + public static List> extractAll(String query, Collection choices, Applicable func, + int cutoff) { Extractor extractor = new Extractor(cutoff); - return extractor.extractWithoutOrder(query, choices, func); + return extractor.extractWithoutOrder(query, choices, ToStringFunction.DEFAULT, func); } @@ -428,11 +428,11 @@ public static List extractAll(String query, Collection * @param choices A list of choices * @return A list of the results */ - public static List extractAll(String query, Collection choices) { + public static List> extractAll(String query, Collection choices) { Extractor extractor = new Extractor(); - return extractor.extractWithoutOrder(query, choices, new WeightedRatio()); + return extractor.extractWithoutOrder(query, choices, ToStringFunction.DEFAULT, new WeightedRatio()); } @@ -445,11 +445,11 @@ public static List extractAll(String query, Collection * @param cutoff Keep only scores above cutoff * @return A list of the results */ - public static List extractAll(String query, Collection choices, int cutoff) { + public static List> extractAll(String query, Collection choices, int cutoff) { Extractor extractor = new Extractor(cutoff); - return extractor.extractWithoutOrder(query, choices, new WeightedRatio()); + return extractor.extractWithoutOrder(query, choices, ToStringFunction.DEFAULT, new WeightedRatio()); } @@ -465,7 +465,7 @@ public static ExtractedResult extractOne(String query, Collection choice Extractor extractor = new Extractor(); - return extractor.extractOne(query, choices, func); + return extractor.extractOne(query, choices, ToStringFunction.DEFAULT, func); } @@ -480,7 +480,276 @@ public static ExtractedResult extractOne(String query, Collection choice Extractor extractor = new Extractor(); - return extractor.extractOne(query, choices, new WeightedRatio()); + return extractor.extractOne(query, choices, ToStringFunction.DEFAULT, new WeightedRatio()); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain the + * top @param limit most similar choices + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @return A list of the results + */ + public static List> extractTop(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func, + int limit, int cutoff) { + + Extractor extractor = new Extractor(cutoff); + return extractor.extractTop(query, choices, toStringFunction, func, limit); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain the + * top @param limit most similar choices + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param limit Limits the number of results and speeds up + * the search (k-top heap sort) is used + * @param cutoff Rejects any entries with score below this + * @return A list of the results + */ + public static List> extractTop(String query, Collection choices, + ToStringFunction toStringFunction, int limit, int cutoff) { + + Extractor extractor = new Extractor(cutoff); + return extractor.extractTop(query, choices, toStringFunction, new WeightedRatio(), limit); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain the + * top @param limit most similar choices + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @param limit The number of results to return + * @return A list of the results + */ + public static List> extractTop(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func, + int limit) { + + Extractor extractor = new Extractor(); + + return extractor.extractTop(query, choices, toStringFunction, func, limit); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain the + * top @param limit most similar choices + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param limit The number of results to return + * @return A list of the results + */ + public static List> extractTop(String query, Collection choices, + ToStringFunction toStringFunction, int limit) { + + Extractor extractor = new Extractor(); + + return extractor.extractTop(query, choices, toStringFunction, new WeightedRatio(), limit); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain all the choices + * with their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @return A list of the results + */ + public static List> extractSorted(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func) { + + Extractor extractor = new Extractor(); + + return extractor.extractTop(query, choices, toStringFunction, func); + + } + + + /** + * Creates a sorted list of {@link ExtractedResult} which contain all the choices + * with their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @param cutoff Keep only scores above cutoff + * @return A list of the results + */ + public static List> extractSorted(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func, + int cutoff) { + + Extractor extractor = new Extractor(cutoff); + + return extractor.extractTop(query, choices, toStringFunction, func); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain all the choices + * with their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @return A list of the results + */ + public static List> extractSorted(String query, Collection choices, + ToStringFunction toStringFunction) { + + Extractor extractor = new Extractor(); + + return extractor.extractTop(query, choices, toStringFunction, new WeightedRatio()); + + } + + /** + * Creates a sorted list of {@link ExtractedResult} which contain all the choices + * with their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param cutoff Keep only scores above cutoff + * @return A list of the results + */ + public static List> extractSorted(String query, Collection choices, + ToStringFunction toStringFunction, int cutoff) { + + Extractor extractor = new Extractor(cutoff); + + return extractor.extractTop(query, choices, toStringFunction, new WeightedRatio()); + + } + + /** + * Creates a list of {@link ExtractedResult} which contain all the choices with + * their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @return A list of the results + */ + public static List> extractAll(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func) { + + Extractor extractor = new Extractor(); + + return extractor.extractWithoutOrder(query, choices, toStringFunction, func); + + } + + /** + * Creates a list of {@link ExtractedResult} which contain all the choices with + * their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func The scoring function + * @param cutoff Keep only scores above cutoff + * @return A list of the results + */ + public static List> extractAll(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func, + int cutoff) { + + Extractor extractor = new Extractor(cutoff); + + return extractor.extractWithoutOrder(query, choices, toStringFunction, func); + + } + + /** + * Creates a list of {@link ExtractedResult} which contain all the choices with + * their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @return A list of the results + */ + public static List> extractAll(String query, Collection choices, + ToStringFunction toStringFunction) { + + Extractor extractor = new Extractor(); + + return extractor.extractWithoutOrder(query, choices, toStringFunction, new WeightedRatio()); + + } + + /** + * Creates a list of {@link ExtractedResult} which contain all the choices with + * their corresponding score where higher is more similar + * + * @param query The query string + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param cutoff Keep only scores above cutoff + * @return A list of the results + */ + public static List> extractAll(String query, Collection choices, + ToStringFunction toStringFunction, int cutoff) { + + Extractor extractor = new Extractor(cutoff); + + return extractor.extractWithoutOrder(query, choices, toStringFunction, new WeightedRatio()); + + } + + /** + * Find the single best match above a score in a list of choices. + * + * @param query A string to match against + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @param func Scoring function + * @return An object containing the best match and it's score + */ + public static ExtractedResult extractOne(String query, Collection choices, + ToStringFunction toStringFunction, Applicable func) { + + Extractor extractor = new Extractor(); + + return extractor.extractOne(query, choices, toStringFunction, func); + + } + + /** + * Find the single best match above a score in a list of choices. + * + * @param query A string to match against + * @param choices A list of choices + * @param toStringFunction The ToStringFunction to be applied to all choices. + * @return An object containing the best match and it's score + */ + public static ExtractedResult extractOne(String query, Collection choices, + ToStringFunction toStringFunction) { + + Extractor extractor = new Extractor(); + + return extractor.extractOne(query, choices, toStringFunction, new WeightedRatio()); } diff --git a/src/me/xdrop/fuzzywuzzy/ToStringFunction.java b/src/me/xdrop/fuzzywuzzy/ToStringFunction.java new file mode 100644 index 0000000..51c5de8 --- /dev/null +++ b/src/me/xdrop/fuzzywuzzy/ToStringFunction.java @@ -0,0 +1,27 @@ +package me.xdrop.fuzzywuzzy; + +/** + * Transforms an item of type T to a String. + * + * @param The type of the item to transform. + */ +public interface ToStringFunction { + /** + * Transforms the input item to a string. + * + * @param item The item to transform. + * @return A string to use for comparing the item. + */ + String apply(T item); + + /** + * A default ToStringFunction that returns the input string; + * used by methods that use plain strings in {@link FuzzySearch}. + */ + ToStringFunction DEFAULT = new ToStringFunction() { + @Override + public String apply(String item) { + return item; + } + }; +} diff --git a/src/me/xdrop/fuzzywuzzy/model/ExtractedResult.java b/src/me/xdrop/fuzzywuzzy/model/ExtractedResult.java index 8a26375..8f1690c 100644 --- a/src/me/xdrop/fuzzywuzzy/model/ExtractedResult.java +++ b/src/me/xdrop/fuzzywuzzy/model/ExtractedResult.java @@ -1,17 +1,14 @@ package me.xdrop.fuzzywuzzy.model; -public class ExtractedResult implements Comparable { +public class ExtractedResult implements Comparable> { + private T referent; private String string; private int score; private int index; - public ExtractedResult(String string, int score) { - this.string = string; - this.score = score; - } - - public ExtractedResult(String string, int score, int index) { + public ExtractedResult(T referent, String string, int score, int index) { + this.referent = referent; this.string = string; this.score = score; this.index = index; @@ -22,6 +19,10 @@ public int compareTo(ExtractedResult o) { return Integer.compare(this.getScore(), o.getScore()); } + public T getReferent() { + return referent; + } + public String getString() { return string; }