Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
73b8241
[ES Temperature] Support informal degrees and fix issue in ES auto-ge…
tellarin Mar 3, 2022
365d66f
[Python] Enabling SpanishMexican culture in Number/Unit/DateTimveV2 (…
colm-dillon Mar 3, 2022
ff11e07
[Python] Add Portuguese support for DateTimeV2 (#2876)
samhickey25 Mar 3, 2022
3e520e5
[KO Number] Fixed incorrect extraction of number from datetime mentio…
aitelint Mar 11, 2022
f703254
[.NET Timexlib] Fix timex-to-string conversion of English ordinals 11…
shana Mar 12, 2022
45b0c3b
[EN .NET] Workaround for TimexProperty.ToString() to not crash on Dat…
shana Mar 14, 2022
d6e3692
Merge pull request #24 from microsoft/master
Mar 30, 2022
30dbdaf
Merge pull request #29 from microsoft/master
Apr 28, 2022
3a2342a
Merge pull request #33 from microsoft/master
samhickey25 May 11, 2022
cd9ba48
Merge pull request #37 from microsoft/master
kevinwalshgen Jun 9, 2022
22f56fd
NLU-2966: Fix failing tests
kevinwalshgen Jun 10, 2022
c544eff
Revert "NLU-2966: Fix failing tests"
kevinwalshgen Jun 10, 2022
8f92c9a
Merge pull request #42 from microsoft/master
samhickey25 Jul 15, 2022
f14677d
Merge branch 'microsoft:master' into master
samhickey25 Aug 8, 2022
d631cbe
Merge branch 'microsoft:master' into master
rbrennangen Aug 15, 2022
a830707
Merge branch 'microsoft:master' into master
rbrennangen Aug 17, 2022
e378aa3
Merge branch 'microsoft:master' into master
rbrennangen Aug 31, 2022
86f14b8
Merge branch 'microsoft:master' into master
Conor-Keaney Nov 10, 2022
4451a53
Merge pull request #53 from microsoft/master
samhickey25 Dec 19, 2022
bde6680
Merge branch 'microsoft:master' into master
Conor-Keaney Jan 30, 2023
b03280c
Merge branch 'microsoft:master' into master
Conor-Keaney Feb 8, 2023
17de6bb
Merge pull request #71 from microsoft/master
samhickey25 Apr 19, 2023
424337a
Add Japanese number with unit support for Python
samhickey25 Apr 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .extractors import *
from .parsers import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from typing import Dict, List, Pattern

from recognizers_text.culture import Culture
from recognizers_text.extractor import Extractor
from recognizers_text.utilities import RegExpUtility
from recognizers_number.culture import CultureInfo
from recognizers_number.number.japanese.extractors import JapaneseNumberExtractor, JapaneseNumberExtractorMode
from recognizers_number_with_unit.number_with_unit.constants import Constants
from recognizers_number_with_unit.number_with_unit.extractors import NumberWithUnitExtractorConfiguration
from recognizers_number_with_unit.resources.japanese_numeric_with_unit import JapaneseNumericWithUnit
from recognizers_number_with_unit.resources.base_units import BaseUnits


# pylint: disable=abstract-method
class JapaneseNumberWithUnitExtractorConfiguration(NumberWithUnitExtractorConfiguration):

@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return JapaneseNumericWithUnit.AmbiguityFiltersDict

@property
def unit_num_extractor(self) -> Extractor:
return self._unit_num_extractor

@property
def build_prefix(self) -> str:
return self._build_prefix

@property
def build_suffix(self) -> str:
return self._build_suffix

@property
def connector_token(self) -> str:
return self._connector_token

@property
def compound_unit_connector_regex(self) -> Pattern:
return self._compound_unit_connector_regex

@property
def non_unit_regex(self) -> Pattern:
return self._pm_non_unit_regex

@property
def half_unit_regex(self) -> Pattern:
return self._half_unit_regex

@property
def ambiguous_unit_number_multiplier_regex(self) -> Pattern:
return None

def expand_half_suffix(self, source, result, numbers):
pass

def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Japanese)
super().__init__(culture_info)
self._unit_num_extractor = JapaneseNumberExtractor(JapaneseNumberExtractorMode.EXTRACT_ALL)
self._build_prefix = JapaneseNumericWithUnit.BuildPrefix
self._build_suffix = JapaneseNumericWithUnit.BuildSuffix
self._connector_token = JapaneseNumericWithUnit.ConnectorToken
self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp(
JapaneseNumericWithUnit.CompoundUnitConnectorRegex)
self._pm_non_unit_regex = RegExpUtility.get_safe_reg_exp(
BaseUnits.PmNonUnitRegex)
self._half_unit_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumericWithUnit.HalfUnitRegex)


# pylint: enable=abstract-method

class JapaneseCurrencyExtractorConfiguration(JapaneseNumberWithUnitExtractorConfiguration):
@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_CURRENCY

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = JapaneseNumericWithUnit.CurrencySuffixList
self._prefix_list = JapaneseNumericWithUnit.CurrencyPrefixList
self._ambiguous_unit_list = JapaneseNumericWithUnit.CurrencyAmbiguousValues

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from recognizers_text.culture import Culture
from recognizers_text.extractor import Extractor
from recognizers_text.parser import Parser
from recognizers_number.culture import CultureInfo
from recognizers_number.number.japanese.extractors import JapaneseNumberExtractor, JapaneseNumberExtractorMode
from recognizers_number.number.parser_factory import AgnosticNumberParserFactory, ParserType
from recognizers_number.number.japanese.parsers import JapaneseNumberParserConfiguration
from recognizers_number_with_unit.number_with_unit.parsers import NumberWithUnitParserConfiguration
from recognizers_number_with_unit.resources.japanese_numeric_with_unit import JapaneseNumericWithUnit


class JapaneseNumberWithUnitParserConfiguration(NumberWithUnitParserConfiguration):
@property
def internal_number_parser(self) -> Parser:
return self._internal_number_parser

@property
def internal_number_extractor(self) -> Extractor:
return self._internal_number_extractor

@property
def connector_token(self) -> str:
return self._connector_token

def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Japanese)
super().__init__(culture_info)
self._internal_number_extractor = JapaneseNumberExtractor(
JapaneseNumberExtractorMode.EXTRACT_ALL)
self._internal_number_parser = AgnosticNumberParserFactory.get_parser(
ParserType.NUMBER, JapaneseNumberParserConfiguration(culture_info))
self._connector_token = JapaneseNumericWithUnit.ConnectorToken


class JapaneseCurrencyParserConfiguration(JapaneseNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(JapaneseNumericWithUnit.CurrencySuffixList)
self.add_dict_to_unit_map(JapaneseNumericWithUnit.CurrencyPrefixList)
self.currency_name_to_iso_code_map = JapaneseNumericWithUnit.CurrencyNameToIsoCodeMap
self.currency_fraction_code_list = JapaneseNumericWithUnit.FractionalUnitNameToCodeMap

Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def parse(self, query: str) -> List[ModelResult]:
parse_results.append(j)
else:
parse_results.append(r)

model_result = None
for parse_result in parse_results:
model_result = ModelResult()
model_result.start = parse_result.start
Expand All @@ -57,6 +57,8 @@ def parse(self, query: str) -> List[ModelResult]:

if b_add:
extraction_results.append(model_result)
if model_result:
break
except Exception:
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
from .german.parsers import GermanCurrencyParserConfiguration
from .italian.extractors import ItalianCurrencyExtractorConfiguration
from .italian.parsers import ItalianCurrencyParserConfiguration
from .japanese.extractors import JapaneseCurrencyExtractorConfiguration
from .japanese.parsers import JapaneseCurrencyParserConfiguration


class NumberWithUnitOptions(IntFlag):
Expand Down Expand Up @@ -264,6 +266,19 @@ def initialize_configuration(self):
]))
# endregion

# region Japanese
self.register_model('CurrencyModel', Culture.Japanese, lambda options: CurrencyModel([
ExtractorParserModel(
BaseMergedUnitExtractor(
JapaneseCurrencyExtractorConfiguration()),
BaseMergedUnitParser(JapaneseCurrencyParserConfiguration())),
ExtractorParserModel(
NumberWithUnitExtractor(
EnglishCurrencyExtractorConfiguration()),
NumberWithUnitParser(EnglishCurrencyParserConfiguration()))
]))
# endregion

def get_age_model(self, culture: str = None, fallback_to_default_culture: bool = True) -> Model:
return self.get_model('AgeModel', culture, fallback_to_default_culture)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
from .dutch_numeric_with_unit import DutchNumericWithUnit
from .portuguese_numeric_with_unit import PortugueseNumericWithUnit
from .spanish_numeric_with_unit import SpanishNumericWithUnit
from .japanese_numeric_with_unit import JapaneseNumericWithUnit
Loading