From bc5d07ba8bb99982889d348855835fb5dabb624f Mon Sep 17 00:00:00 2001 From: Andersama Date: Sat, 28 Dec 2019 18:56:01 -0800 Subject: [PATCH 1/3] Pattern Analysis Adds functionality to analyze the minimum and maximum # of characters a regex may match. --- include/ctre/evaluation.hpp | 243 +++++++++++++++++++++++++++++++++ include/ctre/find_captures.hpp | 4 +- include/ctre/return_type.hpp | 8 +- 3 files changed, 251 insertions(+), 4 deletions(-) diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index 017b9097..6317f034 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -447,6 +447,249 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c // property matching +// pattern analysis - returns the minimum and maximum # of characters in order for a regex to match a string +// a custom std::pair to overload some handy operations that we'll perform w/ a fold +struct analysis_results : std::pair { + // -1 is considered INF, -2 is finite (but perhaps too large to store), all other values are exact counts + static constexpr CTRE_FORCE_INLINE size_t saturate_limit(const size_t& lhs, const size_t& rhs) { + const constexpr size_t inf = size_t{ 0 } -1; + const constexpr size_t lim = size_t{ 0 } -2; + size_t ret = inf; + if (lhs == inf || rhs == inf) { + return ret; + } else { + ret = lhs + rhs; + ret = ret < lhs ? lim : ret == inf ? lim : ret; + } + return ret; + } + + static constexpr CTRE_FORCE_INLINE size_t mult_saturate_limit(const size_t& lhs, const size_t& rhs) { + const constexpr size_t inf = size_t{ 0 } -1; + const constexpr size_t lim = size_t{ 0 } -2; + size_t ret = inf; + if (lhs == inf || rhs == inf) { + return ret; + } else if (lhs == 0 || rhs == 0) { + return ret = 0; + } else { + if (lhs > (SIZE_MAX / rhs)) + return ret = lim; + ret = lhs * rhs; + ret = ret == inf ? lim : ret; + return ret; + } + } + + constexpr inline CTRE_FORCE_INLINE operator bool() const noexcept { + return first; + } + constexpr auto CTRE_FORCE_INLINE operator+(analysis_results other) const noexcept { + return analysis_results{std::make_pair( + saturate_limit(first, other.first), + saturate_limit(second, other.second) + )}; + } + constexpr auto CTRE_FORCE_INLINE operator||(analysis_results other) const noexcept { + return analysis_results{std::make_pair( + std::min(first, other.first), + std::max(second, other.second) + )}; + } +}; + +template +static constexpr auto trampoline_analysis(Pattern) noexcept; + +template +static constexpr auto trampoline_analysis(ctll::list) noexcept; + +template +static constexpr auto trampoline_analysis(T, R captures) noexcept; + +//processing for each type + +//repeat +template +static constexpr auto _analyze(repeat, R captures) noexcept { + analysis_results ret{ std::make_pair(0ULL, 0ULL) }; + if constexpr (sizeof...(Content)) { + ret = trampoline_analysis(ctll::list(), captures); + ret.first = analysis_results::mult_saturate_limit(ret.first, A); + ret.second = analysis_results::mult_saturate_limit(ret.second, B); + } + return ret; +} + +//note: all * ? + operations are specialized variations of repeat {A,B} +//lazy_repeat +template +static constexpr auto _analyze(lazy_repeat, R captures) noexcept { + return _analyze(repeat(), captures); +} + +//possessive_repeat +template +static constexpr auto _analyze(possessive_repeat, R captures) noexcept { + return _analyze(repeat(), captures); +} + +//star +template +static constexpr auto _analyze(star, R captures) noexcept { + return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures); +} + +//lazy_star +template +static constexpr auto _analyze(lazy_star, R captures) noexcept { + return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures); +} + +//possessive_star +template +static constexpr auto _analyze(possessive_star, R captures) noexcept { + return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures); +} + +//plus +template +static constexpr auto _analyze(plus, R captures) noexcept { + return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures); +} + +//lazy_plus +template +static constexpr auto _analyze(lazy_star, R captures) noexcept { + return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures); +} + +//possessive_plus +template +static constexpr auto _analyze(possessive_star, R captures) noexcept { + return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures); +} + +//optional +template +static constexpr auto _analyze(optional, R captures) noexcept { + return _analyze(repeat<0ULL, 1ULL, Content...>(), captures); +} + +//lazy_optional +template +static constexpr auto _analyze(lazy_optional, R captures) noexcept { + return _analyze(repeat<0ULL, 1ULL, Content...>(), captures); +} + +//back_reference +template +static constexpr auto _analyze(back_reference, R captures) noexcept { + const auto ref = captures.template get(); + analysis_results ret{ std::make_pair(0ULL, 0ULL) }; + if constexpr (size(ref.get_expression())) { + ret = trampoline_analysis(ref.get_expression(), captures); + } + return ret; +} + +//back_reference_with_name +template +static constexpr auto _analyze(back_reference_with_name, R captures) noexcept { + const auto ref = captures.template get(); + analysis_results ret{ std::make_pair(0ULL, 0ULL) }; + if constexpr (size(ref.get_expression())) { + ret = trampoline_analysis(ref.get_expression(), captures); + } + return ret; +} + +//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums +template +static constexpr auto _analyze(select, R captures) noexcept { + analysis_results ret = trampoline_select_analysis(ctll::list(), captures); + return ret; +} + +//character, any character contributes exactly one to both counts +template +static constexpr auto _analyze(character, R captures) noexcept { + analysis_results ret{ std::make_pair(1ULL, 1ULL) }; + return ret; +} + +//strings, any string contributes the # of characters it contains (if we have an empty string that'll be 0) +template +static constexpr auto _analyze(string, R captures) noexcept { + analysis_results ret{ std::make_pair(sizeof...(Str), sizeof...(Str)) }; + return ret; +} + +//we'll process anything that has contents as a regex +//ctll::list +template +static constexpr auto _analyze(ctll::list,R captures) noexcept { + analysis_results ret = trampoline_analysis(ctll::list(), captures); + return ret; +} + +//sequence +template +static constexpr auto _analyze(sequence, R captures) noexcept { + analysis_results ret = trampoline_analysis(ctll::list(), captures); + return ret; +} + +//capture +template +static constexpr auto _analyze(capture, R captures) noexcept { + analysis_results ret = trampoline_analysis(ctll::list(), captures); + return ret; +} + +//capture_with_name +template +static constexpr auto _analyze(capture_with_name, R captures) noexcept { + analysis_results ret = trampoline_analysis(ctll::list(), captures); + return ret; +} + +//everything else, anything we haven't matched already isn't supported and will contribute 0 +template +static constexpr auto _analyze(T, R captures) noexcept { + analysis_results ret{ std::make_pair(0ULL, 0ULL) }; + return ret; +} +//note: ctll::list wraps patterns just like sequences, we'll treat anything that looks like a regex w/ ctll::list +template +static constexpr auto trampoline_analysis(ctll::list, R captures) noexcept { + //fold, for every argument in a ctll::list, calculate its contribution to the limits + auto r = ((_analyze(Patterns(), captures)) + ...); + //note any reordering of parameters will result in the same limits + return r; +} + +template +static constexpr auto trampoline_select_analysis(ctll::list, R captures) noexcept { + //fold, each argument in a selection of regexes we take the minimum and maximum of all values + auto r = ((trampoline_analysis(Patterns(), captures)) || ...); + //note again, order is unimportant + return r; +} + +template +static constexpr auto pattern_analysis(ctll::list) noexcept { + using return_type = decltype(regex_results(std::declval::iterator>(), find_captures(pattern))); + return trampoline_analysis(ctll::list(), return_type{}); +} + +template +static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept { + using return_type = decltype(regex_results(std::declval::iterator>(), find_captures(pattern))); + return trampoline_analysis(ctll::list(), return_type{}); +} + + } #endif diff --git a/include/ctre/find_captures.hpp b/include/ctre/find_captures.hpp index 338d266f..6e914bc8 100644 --- a/include/ctre/find_captures.hpp +++ b/include/ctre/find_captures.hpp @@ -112,12 +112,12 @@ template constexpr auto template constexpr auto find_captures(ctll::list, Tail...>, ctll::list) noexcept { - return find_captures(ctll::list(), ctll::list>()); + return find_captures(ctll::list(), ctll::list>>()); } template constexpr auto find_captures(ctll::list, Tail...>, ctll::list) noexcept { - return find_captures(ctll::list(), ctll::list>()); + return find_captures(ctll::list(), ctll::list>>()); } diff --git a/include/ctre/return_type.hpp b/include/ctre/return_type.hpp index 7a5b6458..2d2de586 100644 --- a/include/ctre/return_type.hpp +++ b/include/ctre/return_type.hpp @@ -13,7 +13,7 @@ struct not_matched_tag_t { }; static constexpr inline auto not_matched = not_matched_tag_t{}; -template struct captured_content { +template struct captured_content { template class storage { Iterator _begin{}; Iterator _end{}; @@ -21,7 +21,7 @@ template struct captured_content { bool _matched{false}; public: using char_type = typename std::iterator_traits::value_type; - + using content_type = Content; using name = Name; constexpr CTRE_FORCE_INLINE storage() noexcept {} @@ -86,6 +86,10 @@ template struct captured_content { constexpr CTRE_FORCE_INLINE static size_t get_id() noexcept { return Id; } + + constexpr CTRE_FORCE_INLINE static content_type get_expression() noexcept { + return {}; + } }; }; From d44f1c84e19a07cf4708b6f0bd286915d862b745 Mon Sep 17 00:00:00 2001 From: Andersama Date: Sun, 29 Dec 2019 16:23:50 -0800 Subject: [PATCH 2/3] fix constexpr and issue with select --- include/ctre/evaluation.hpp | 137 +++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 63 deletions(-) diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index 6317f034..a0039063 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -18,6 +18,10 @@ namespace ctre { +struct analysis_results; +template, typename R = ctll::list<>> +static constexpr analysis_results trampoline_analysis(T, R captures) noexcept; + // calling with pattern prepare stack and triplet of iterators template constexpr inline auto match_re(const Iterator begin, const EndIterator end, Pattern pattern) noexcept { @@ -457,7 +461,8 @@ struct analysis_results : std::pair { size_t ret = inf; if (lhs == inf || rhs == inf) { return ret; - } else { + } + else { ret = lhs + rhs; ret = ret < lhs ? lim : ret == inf ? lim : ret; } @@ -470,9 +475,11 @@ struct analysis_results : std::pair { size_t ret = inf; if (lhs == inf || rhs == inf) { return ret; - } else if (lhs == 0 || rhs == 0) { + } + else if (lhs == 0 || rhs == 0) { return ret = 0; - } else { + } + else { if (lhs > (SIZE_MAX / rhs)) return ret = lim; ret = lhs * rhs; @@ -480,38 +487,43 @@ struct analysis_results : std::pair { return ret; } } - + constexpr inline CTRE_FORCE_INLINE operator bool() const noexcept { - return first; + return first > 0; } - constexpr auto CTRE_FORCE_INLINE operator+(analysis_results other) const noexcept { - return analysis_results{std::make_pair( - saturate_limit(first, other.first), - saturate_limit(second, other.second) - )}; + friend constexpr auto CTRE_FORCE_INLINE operator+(const analysis_results &lhs, const analysis_results &other) noexcept { + return analysis_results{ std::make_pair( + saturate_limit(lhs.first, other.first), + saturate_limit(lhs.second, other.second) + ) }; } - constexpr auto CTRE_FORCE_INLINE operator||(analysis_results other) const noexcept { - return analysis_results{std::make_pair( - std::min(first, other.first), - std::max(second, other.second) + friend constexpr auto CTRE_FORCE_INLINE operator||(const analysis_results& lhs, const analysis_results& other) noexcept { + return analysis_results{ std::make_pair( + std::min(lhs.first, other.first), + std::max(lhs.second, other.second) )}; } + constexpr analysis_results& operator =(const analysis_results& rhs) { + first = rhs.first; + second = rhs.second; + return *this; + } }; -template -static constexpr auto trampoline_analysis(Pattern) noexcept; - -template -static constexpr auto trampoline_analysis(ctll::list) noexcept; +//processing for each type template -static constexpr auto trampoline_analysis(T, R captures) noexcept; +static constexpr analysis_results _analyze(T, R captures) noexcept; -//processing for each type +template +static constexpr auto pattern_analysis(ctll::list) noexcept; + +template +static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept; //repeat template -static constexpr auto _analyze(repeat, R captures) noexcept { +static constexpr analysis_results _analyze(repeat, R captures) noexcept { analysis_results ret{ std::make_pair(0ULL, 0ULL) }; if constexpr (sizeof...(Content)) { ret = trampoline_analysis(ctll::list(), captures); @@ -524,67 +536,67 @@ static constexpr auto _analyze(repeat, R captures) noexcept { //note: all * ? + operations are specialized variations of repeat {A,B} //lazy_repeat template -static constexpr auto _analyze(lazy_repeat, R captures) noexcept { +static constexpr analysis_results _analyze(lazy_repeat, R captures) noexcept { return _analyze(repeat(), captures); } //possessive_repeat template -static constexpr auto _analyze(possessive_repeat, R captures) noexcept { +static constexpr analysis_results _analyze(possessive_repeat, R captures) noexcept { return _analyze(repeat(), captures); } //star template -static constexpr auto _analyze(star, R captures) noexcept { +static constexpr analysis_results _analyze(star, R captures) noexcept { return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures); } //lazy_star template -static constexpr auto _analyze(lazy_star, R captures) noexcept { +static constexpr analysis_results _analyze(lazy_star, R captures) noexcept { return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures); } //possessive_star template -static constexpr auto _analyze(possessive_star, R captures) noexcept { +static constexpr analysis_results _analyze(possessive_star, R captures) noexcept { return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures); } //plus template -static constexpr auto _analyze(plus, R captures) noexcept { +static constexpr analysis_results _analyze(plus, R captures) noexcept { return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures); } //lazy_plus template -static constexpr auto _analyze(lazy_star, R captures) noexcept { +static constexpr analysis_results _analyze(lazy_plus, R captures) noexcept { return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures); } //possessive_plus template -static constexpr auto _analyze(possessive_star, R captures) noexcept { +static constexpr analysis_results _analyze(possessive_plus, R captures) noexcept { return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures); } //optional template -static constexpr auto _analyze(optional, R captures) noexcept { +static constexpr analysis_results _analyze(optional, R captures) noexcept { return _analyze(repeat<0ULL, 1ULL, Content...>(), captures); } //lazy_optional template -static constexpr auto _analyze(lazy_optional, R captures) noexcept { +static constexpr analysis_results _analyze(lazy_optional, R captures) noexcept { return _analyze(repeat<0ULL, 1ULL, Content...>(), captures); } //back_reference template -static constexpr auto _analyze(back_reference, R captures) noexcept { +static constexpr analysis_results _analyze(back_reference, R captures) noexcept { const auto ref = captures.template get(); analysis_results ret{ std::make_pair(0ULL, 0ULL) }; if constexpr (size(ref.get_expression())) { @@ -595,7 +607,7 @@ static constexpr auto _analyze(back_reference, R captures) noexcept { //back_reference_with_name template -static constexpr auto _analyze(back_reference_with_name, R captures) noexcept { +static constexpr analysis_results _analyze(back_reference_with_name, R captures) noexcept { const auto ref = captures.template get(); analysis_results ret{ std::make_pair(0ULL, 0ULL) }; if constexpr (size(ref.get_expression())) { @@ -604,23 +616,16 @@ static constexpr auto _analyze(back_reference_with_name, R captures) noexc return ret; } -//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums -template -static constexpr auto _analyze(select, R captures) noexcept { - analysis_results ret = trampoline_select_analysis(ctll::list(), captures); - return ret; -} - //character, any character contributes exactly one to both counts template -static constexpr auto _analyze(character, R captures) noexcept { +static constexpr analysis_results _analyze(character, R captures) noexcept { analysis_results ret{ std::make_pair(1ULL, 1ULL) }; return ret; } //strings, any string contributes the # of characters it contains (if we have an empty string that'll be 0) template -static constexpr auto _analyze(string, R captures) noexcept { +static constexpr analysis_results _analyze(string, R captures) noexcept { analysis_results ret{ std::make_pair(sizeof...(Str), sizeof...(Str)) }; return ret; } @@ -628,53 +633,60 @@ static constexpr auto _analyze(string, R captures) noexcept { //we'll process anything that has contents as a regex //ctll::list template -static constexpr auto _analyze(ctll::list,R captures) noexcept { - analysis_results ret = trampoline_analysis(ctll::list(), captures); +static constexpr analysis_results _analyze(ctll::list, R captures) noexcept { + analysis_results ret{ trampoline_analysis(ctll::list(), captures) }; return ret; } //sequence template -static constexpr auto _analyze(sequence, R captures) noexcept { - analysis_results ret = trampoline_analysis(ctll::list(), captures); +static constexpr analysis_results _analyze(sequence, R captures) noexcept { + analysis_results ret{ trampoline_analysis(ctll::list(), captures) }; return ret; } //capture template -static constexpr auto _analyze(capture, R captures) noexcept { - analysis_results ret = trampoline_analysis(ctll::list(), captures); +static constexpr analysis_results _analyze(capture, R captures) noexcept { + analysis_results ret{ trampoline_analysis(ctll::list(), captures) }; return ret; } //capture_with_name template -static constexpr auto _analyze(capture_with_name, R captures) noexcept { - analysis_results ret = trampoline_analysis(ctll::list(), captures); +static constexpr analysis_results _analyze(capture_with_name, R captures) noexcept { + analysis_results ret{ trampoline_analysis(ctll::list(), captures) }; return ret; } //everything else, anything we haven't matched already isn't supported and will contribute 0 template -static constexpr auto _analyze(T, R captures) noexcept { +static constexpr analysis_results _analyze(T, R captures) noexcept { analysis_results ret{ std::make_pair(0ULL, 0ULL) }; return ret; } + +//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums +template +static constexpr analysis_results _analyze(select, R captures) noexcept { + analysis_results ret = ((trampoline_analysis(Content(), captures)) || ...); + return ret; +} + //note: ctll::list wraps patterns just like sequences, we'll treat anything that looks like a regex w/ ctll::list template -static constexpr auto trampoline_analysis(ctll::list, R captures) noexcept { +static constexpr analysis_results trampoline_analysis(ctll::list, R captures) noexcept { //fold, for every argument in a ctll::list, calculate its contribution to the limits - auto r = ((_analyze(Patterns(), captures)) + ...); + analysis_results ret = ((_analyze(Patterns(), captures)) + ...); //note any reordering of parameters will result in the same limits - return r; + return ret; } -template -static constexpr auto trampoline_select_analysis(ctll::list, R captures) noexcept { - //fold, each argument in a selection of regexes we take the minimum and maximum of all values - auto r = ((trampoline_analysis(Patterns(), captures)) || ...); - //note again, order is unimportant - return r; +template +static constexpr analysis_results trampoline_analysis(Pattern pattern, R captures) noexcept { + //some individual type, we can immediately analyze it + analysis_results ret = _analyze(pattern, captures); + return ret; } template @@ -684,12 +696,11 @@ static constexpr auto pattern_analysis(ctll::list) noexcept { } template -static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept { +static constexpr auto pattern_analysis(Pattern pattern) noexcept { using return_type = decltype(regex_results(std::declval::iterator>(), find_captures(pattern))); return trampoline_analysis(ctll::list(), return_type{}); } - } #endif From 94b3a932bff9d5deecb210261d3da843b688ca22 Mon Sep 17 00:00:00 2001 From: Andersama Date: Sun, 29 Dec 2019 18:12:41 -0800 Subject: [PATCH 3/3] Handle characterlike things as opposed to just characters --- include/ctre/evaluation.hpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/include/ctre/evaluation.hpp b/include/ctre/evaluation.hpp index a0039063..80199356 100644 --- a/include/ctre/evaluation.hpp +++ b/include/ctre/evaluation.hpp @@ -510,17 +510,13 @@ struct analysis_results : std::pair { } }; -//processing for each type - -template -static constexpr analysis_results _analyze(T, R captures) noexcept; - template static constexpr auto pattern_analysis(ctll::list) noexcept; template static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept; +//processing for each type //repeat template static constexpr analysis_results _analyze(repeat, R captures) noexcept { @@ -616,9 +612,9 @@ static constexpr analysis_results _analyze(back_reference_with_name, R cap return ret; } -//character, any character contributes exactly one to both counts -template -static constexpr analysis_results _analyze(character, R captures) noexcept { +//CharacterLike, anything that's like a character contributes 1 to both counts +template ::template value::iterator>())>)>> + static constexpr analysis_results _analyze(CharacterLike, R captures) { analysis_results ret{ std::make_pair(1ULL, 1ULL) }; return ret; } @@ -660,7 +656,7 @@ static constexpr analysis_results _analyze(capture_with_name +template::template value::iterator>())>)>> static constexpr analysis_results _analyze(T, R captures) noexcept { analysis_results ret{ std::make_pair(0ULL, 0ULL) }; return ret;