Skip to content
This repository was archived by the owner on Dec 4, 2023. It is now read-only.

[SDK][Recognizers-Text] Update temporal folder with latest changes #1164

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,25 @@ private boolean validateMatch(Match match, String text) {
isValidMatch = startsWithBasicDate(subText);
}
}

// Expressions with mixed separators are not considered valid dates e.g. "30/4.85" (unless one is a comma "30/4, 2016")
MatchGroup dayGroup = match.getGroup("day");
MatchGroup monthGroup = match.getGroup("month");
if (!StringUtility.isNullOrEmpty(dayGroup.value) && !StringUtility.isNullOrEmpty(monthGroup.value)) {
String noDateText = match.value.replace(yearGroup.value, "")
.replace(monthGroup.value, "").replace(dayGroup.value, "");
String[] separators = {"/", "\\", "-", "."};
int separatorCount = 0;
for (String separator : separators) {
if (noDateText.contains(separator)) {
separatorCount++;
}
if (separatorCount > 1) {
isValidMatch = false;
break;
}
}
}
}

return isValidMatch;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,27 @@ public List<Token> timeOfTodayAfter(String input, LocalDateTime reference) {

Match[] matches = RegExpUtility.getMatches(this.config.getSimpleTimeOfTodayAfterRegex(), input);
for (Match match : matches) {
// @TODO Remove when lookbehinds are handled correctly
if (isDecimal(match, input)) {
continue;
}

ret.add(new Token(match.index, match.index + match.length));
}

return ret;
}

// Check if the match is part of a decimal number (e.g. 123.24)
private boolean isDecimal(Match match, String text) {
boolean isDecimal = false;
if (match.index > 1 && (text.charAt(match.index - 1) == ',' ||
text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) {
isDecimal = true;
}

return isDecimal;
}

public List<Token> timeOfTodayBefore(String input, LocalDateTime reference) {
List<Token> ret = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public final List<Token> basicRegexMatch(String text) {

Match[] matches = RegExpUtility.getMatches(regex, text);
for (Match match : matches) {

// @TODO Remove when lookbehinds are handled correctly
if (isDecimal(match, text)) {
continue;
}

// @TODO Workaround to avoid incorrect partial-only matches. Remove after time regex reviews across languages.
String lth = match.getGroup("lth").value;
Expand All @@ -102,6 +107,17 @@ public final List<Token> basicRegexMatch(String text) {

return ret;
}

// Check if the match is part of a decimal number (e.g. 123.24)
private boolean isDecimal(Match match, String text) {
boolean isDecimal = false;
if (match.index > 1 && (text.charAt(match.index - 1) == ',' ||
text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) {
isDecimal = true;
}

return isDecimal;
}

private List<Token> atRegexMatch(String text) {
List<Token> ret = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

public class BaseDateTime {

public static final String HourRegex = "(?<hour>2[0-4]|[0-1]?\\d)(h)?";
public static final String HourRegex = "(?<!\\d[,.])(?<hour>2[0-4]|[0-1]?\\d)(h)?";

public static final String TwoDigitHourRegex = "(?<hour>[0-1]\\d|2[0-4])(h)?";

Expand All @@ -36,6 +36,8 @@ public class BaseDateTime {
public static final String IllegalYearRegex = "([-])({FourDigitYearRegex})([-])"
.replace("{FourDigitYearRegex}", FourDigitYearRegex);

public static final String CheckDecimalRegex = "(?![,.]\\d)";

public static final String RangeConnectorSymbolRegex = "(--|-|—|——|~|–)";

public static final String BaseAmDescRegex = "(am\\b|a\\s*\\.\\s*m\\s*\\.|a[\\.]?\\s*m\\b)";
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public class EnglishTimeZone {
.put("esat", -180)
.put("est", -300)
.put("estm", -300)
.put("et", -240)
.put("et", -300)
.put("fjst", 780)
.put("fjt", 720)
.put("get", 240)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ public class FrenchDateTime {

public static final String RangeConnectorRegex = "(?<and>de la|au|[aà]|et(\\s*la)?|--|-|—|——)";

public static final String RelativeRegex = "(?<order>prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";
public static final String RelativeRegex = "(?<order>prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";

public static final String StrictRelativeRegex = "(?<order>prochaine?|derni[eè]re|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";
public static final String StrictRelativeRegex = "(?<order>prochaine?|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";

public static final String NextSuffixRegex = "(?<order>prochaines?|prochain|suivante)\\b";

Expand All @@ -39,9 +39,9 @@ public class FrenchDateTime {

public static final String RangePrefixRegex = "(du|depuis|des?|entre)";

public static final String DayRegex = "(?<day>01|02|03|04|05|06|07|08|09|10|11e?|12e?|13e?|14e?|15e?|16e?|17e?|18e?|19e?|1er|1|21e?|20e?|22e?|23e?|24e?|25e?|26e?|27e?|28e?|29e?|2e?|30e?|31e?|3e?|4e?|5e?|6e?|7e?|8e?|9e?)(?=\\b|t)";
public static final String DayRegex = "(?<day>(?:3[0-1]|[1-2]\\d|0?[1-9])(e(r)?)?)(?=\\b|t)";

public static final String MonthNumRegex = "(?<month>01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b";
public static final String MonthNumRegex = "(?<month>1[0-2]|(0)?[1-9])\\b";

public static final String SpecialDescRegex = "(p\\b)";

Expand All @@ -60,7 +60,7 @@ public class FrenchDateTime {
.replace("{AmPmDescRegex}", AmPmDescRegex)
.replace("{SpecialDescRegex}", SpecialDescRegex);

public static final String TwoDigitYearRegex = "\\b(?<![$])(?<year>([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
public static final String TwoDigitYearRegex = "\\b(?<![$])(?<year>([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
.replace("{AmDescRegex}", AmDescRegex)
.replace("{PmDescRegex}", PmDescRegex);

Expand Down Expand Up @@ -212,23 +212,25 @@ public class FrenchDateTime {
.replace("{YearRegex}", YearRegex)
.replace("{TwoDigitYearRegex}", TwoDigitYearRegex);

public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}\\b"
public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}(\\s*[/\\\\\\.\\-]?\\s*{BaseDateTime.FourDigitYearRegex})?\\b"
.replace("{WeekDayRegex}", WeekDayRegex)
.replace("{MonthRegex}", MonthRegex)
.replace("{DayRegex}", DayRegex);
.replace("{DayRegex}", DayRegex)
.replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);

public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}(\\s+|\\s*,\\s*|\\s+){MonthRegex}\\s*[\\.\\-]?\\s*{DateYearRegex}\\b"
.replace("{WeekDayRegex}", WeekDayRegex)
.replace("{MonthRegex}", MonthRegex)
.replace("{DayRegex}", DayRegex)
.replace("{DateYearRegex}", DateYearRegex);

public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?(?<!\\d\\s)(?<!\\d){DayRegex}(\\s+|\\s*,\\s*|\\s*-\\s*)({MonthRegex}((\\s+|\\s*,\\s*){DateYearRegex}(?!\\s*\\d))?|{MonthNumRegex}(\\s+|\\s*,\\s*){DateYearRegex}(?!\\s*\\d))\\b"
public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?((?<!\\d\\s)(?<!\\d){DayRegex}(\\s+|\\s*[.,/-])({MonthRegex}((\\s+|\\s*[.,/-]\\s*){DateYearRegex}(?!\\s*\\d))?|{MonthNumRegex}(\\s+|\\s*[.,/-]\\s*){DateYearRegex}(?!\\s*\\d))|{BaseDateTime.FourDigitYearRegex}\\s*[.,/-]?\\s*{DayRegex}\\s*[.,/-]?\\s*{MonthRegex})\\b"
.replace("{WeekDayRegex}", WeekDayRegex)
.replace("{DayRegex}", DayRegex)
.replace("{MonthRegex}", MonthRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DateYearRegex}", DateYearRegex);
.replace("{DateYearRegex}", DateYearRegex)
.replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);

public static final String DateExtractor4 = "\\b{MonthNumRegex}\\s*[/\\\\\\-]\\s*{DayRegex}\\s*[/\\\\\\-]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)"
.replace("{MonthNumRegex}", MonthNumRegex)
Expand All @@ -241,28 +243,34 @@ public class FrenchDateTime {
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DateYearRegex}", DateYearRegex);

public static final String DateExtractor6 = "(?<=\\b(le|sur(\\sl[ae])?)\\s+){MonthNumRegex}[\\-\\.\\/]{DayRegex}\\b"
public static final String DateExtractor6 = "(?<=\\b(le|sur(\\sl[ae])?)\\s+){MonthNumRegex}[\\-\\.\\/]{DayRegex}{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex);
.replace("{DayRegex}", DayRegex)
.replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);

public static final String DateExtractor7 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*,\\s*){DateYearRegex})?\\b"
public static final String DateExtractor7 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*,\\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DateYearRegex}", DateYearRegex);
.replace("{DateYearRegex}", DateYearRegex)
.replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);

public static final String DateExtractor8 = "(?<=\\b(le)\\s+){DayRegex}[\\\\\\-]{MonthNumRegex}\\b"
public static final String DateExtractor8 = "(?<=\\b(le)\\s+){DayRegex}[\\\\\\-]{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex);
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);

public static final String DateExtractor9 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*,\\s*){DateYearRegex})?\\b"
public static final String DateExtractor9 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*,\\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DateYearRegex}", DateYearRegex);
.replace("{DateYearRegex}", DateYearRegex)
.replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);

public static final String DateExtractorA = "\\b{DateYearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthNumRegex}\\s*[/\\\\\\-\\.]\\s*{DayRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)"
public static final String DateExtractorA = "\\b({DateYearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex}|{MonthRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{DayRegex}|{DayRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthRegex})(?!\\s*[/\\\\\\-\\.:]\\s*\\d+)"
.replace("{DateYearRegex}", DateYearRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex);
.replace("{MonthRegex}", MonthRegex)
.replace("{DayRegex}", DayRegex)
.replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);

public static final String OfMonth = "^\\s*de\\s*{MonthRegex}"
.replace("{MonthRegex}", MonthRegex);
Expand Down Expand Up @@ -305,7 +313,7 @@ public class FrenchDateTime {
.replace("{PmRegex}", PmRegex)
.replace("{OclockRegex}", OclockRegex);

public static final String BasicTime = "(?<basictime>{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"
public static final String BasicTime = "(?<basictime>{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(:|\\s*h\\s*){BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"
.replace("{WrittenTimeRegex}", WrittenTimeRegex)
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
Expand Down Expand Up @@ -348,7 +356,7 @@ public class FrenchDateTime {

public static final String RestrictedTimeUnitRegex = "(?<unit>huere|minute)\\b";

public static final String ConnectNumRegex = "{BaseDateTime.HourRegex}(?<min>00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex}"
public static final String ConnectNumRegex = "{BaseDateTime.HourRegex}(?<min>[0-5][0-9])\\s*{DescRegex}"
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{DescRegex}", DescRegex);

Expand Down Expand Up @@ -446,7 +454,7 @@ public class FrenchDateTime {

public static final String TimeOfDayRegex = "\\b(?<timeOfDay>((((dans\\s+(l[ea])?\\s+)?((?<early>d[eé]but(\\s+|-)|t[oô]t(\\s+|-)(l[ea]\\s*)?)|(?<late>fin\\s*|fin de(\\s+(la)?)|tard\\s*))?(matin([ée]e)?|((d|l)?'?)apr[eè]s[-|\\s*]midi|nuit|soir([eé]e)?)))|(((\\s+(l[ea])?\\s+)?)jour(n[eé]e)?))s?)\\b";

public static final String SpecificTimeOfDayRegex = "\\b(({RelativeRegex}\\s+{TimeOfDayRegex})|({TimeOfDayRegex}\\s*({NextSuffixRegex}))\\b|\\bsoir|\\bdu soir)s?\\b"
public static final String SpecificTimeOfDayRegex = "\\b(({RelativeRegex}\\s+{TimeOfDayRegex})|({TimeOfDayRegex}\\s*({NextSuffixRegex}))\\b|\\b(du )?soir)s?\\b"
.replace("{TimeOfDayRegex}", TimeOfDayRegex)
.replace("{RelativeRegex}", RelativeRegex)
.replace("{NextSuffixRegex}", NextSuffixRegex);
Expand All @@ -470,15 +478,15 @@ public class FrenchDateTime {
public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?(en|dans|du\\s+)?{DateTimeSpecificTimeOfDayRegex}"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);

public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|pour))?\\s*$"
public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers|pour))?\\s*$"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);

public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?(en|[àa]\\s+)?{DateTimeSpecificTimeOfDayRegex}"
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);

public static final String SimpleTimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex})"
public static final String SimpleTimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers|pour))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex})"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex)
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex);
Expand Down Expand Up @@ -581,7 +589,7 @@ public class FrenchDateTime {

public static final String SinceRegex = "\\b(depuis)\\b";

public static final String AroundRegex = "^[.]";
public static final String AroundRegex = "\\b(vers)\\b";

public static final String AgoPrefixRegex = "\\b(y a)\\b";

Expand Down Expand Up @@ -640,7 +648,7 @@ public class FrenchDateTime {
public static final String RelativeDayRegex = "\\b(((la\\s+)?{RelativeRegex}\\s+journ[ée]e))\\b"
.replace("{RelativeRegex}", RelativeRegex);

public static final String ConnectorRegex = "^(,|pour|t|vers)$";
public static final String ConnectorRegex = "^(,|pour|t|vers|le)$";

public static final String ConnectorAndRegex = "\\b(et\\s*(le|las?)?)\\b.+";

Expand Down Expand Up @@ -1188,6 +1196,7 @@ public class FrenchDateTime {
public static final ImmutableMap<String, String> AmbiguityFiltersDict = ImmutableMap.<String, String>builder()
.put("^([eé]t[eé])$", "(?<!((l\\s*['`]\\s*)|(cet(te)?|en)\\s+))[eé]t[eé]\\b")
.put("^(mer)$", "(?<!((le|ce)\\s+))mer\\b")
.put("^(avr|ao[uû]t|d[eé]c|f[eé]vr?|janv?|jui?[ln]|mars?|mai|nov|oct|sept?)$", "([$%£&!?@#])(avr|ao[uû]t|d[eé]c|f[eé]vr?|janv?|jui?[ln]|mars?|mai|nov|oct|sept?)|(avr|ao[uû]t|d[eé]c|f[eé]vr?|janv?|jui?[ln]|mars?|mai|nov|oct|sept?)([$%£&@#])")
.build();

public static final ImmutableMap<String, String> AmbiguityTimeFiltersDict = ImmutableMap.<String, String>builder()
Expand Down
Loading