Skip to content

Commit e4c6f6b

Browse files
authored
Merge pull request #1228 from PyThaiNLP/copilot/revert-pep604-union-types
Revert PEP 604 union syntax to Union[]/Optional[] for Python 3.9 compatibility
2 parents 9e6520b + 273c396 commit e4c6f6b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+168
-106
lines changed

pythainlp/augment/wordnet.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import itertools
1515
from collections import OrderedDict
16+
from typing import Optional
1617

1718
from nltk.corpus import wordnet as wn
1819

@@ -123,12 +124,12 @@ def __init__(self):
123124
pass
124125

125126
def find_synonyms(
126-
self, word: str, pos: str | None = None, postag_corpus: str = "orchid"
127+
self, word: str, pos: Optional[str] = None, postag_corpus: str = "orchid"
127128
) -> list[str]:
128129
"""Find synonyms using wordnet
129130
130131
:param str word: word
131-
:param str | None pos: part-of-speech type. Default is None.
132+
:param Optional[str] pos: part-of-speech type. Default is None.
132133
:param str postag_corpus: name of POS tag corpus
133134
:return: list of synonyms
134135
:rtype: List[str]

pythainlp/benchmarks/word_tokenization.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def _flatten_result(my_dict: dict, sep: str = ":") -> dict:
5454
:param str sep: separator between the two keys (default: ":")
5555
5656
:return: a one-dimension dictionary with keys combined
57-
:rtype: dict[str, float | str]
57+
:rtype: dict[str, Union[float, str]]
5858
"""
5959
return {
6060
f"{k1}{sep}{k2}": v
@@ -146,7 +146,7 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict:
146146
:param str samples: samples that we want to evaluate
147147
148148
:return: metrics at character- and word-level and indicators of correctly tokenized words
149-
:rtype: dict[str, float | str]
149+
:rtype: dict[str, Union[float, str]]
150150
"""
151151
ref_sample = _binary_representation(ref_sample)
152152
sample = _binary_representation(raw_sample)

pythainlp/classify/param_free.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import gzip
77
import json
8+
from typing import Optional
89

910
import numpy as np
1011

@@ -14,15 +15,15 @@ class GzipModel:
1415
“Low-Resource” Text Classification: A Parameter-Free Classification Method
1516
with Compressors (Jiang et al., Findings 2023)
1617
17-
:param list | None training_data: list [(text_sample,label)].
18+
:param Optional[list] training_data: list [(text_sample,label)].
1819
Default is None.
1920
:param str model_path: Path for loading model (if you saved the model).
2021
Default is empty string.
2122
"""
2223

2324
def __init__(
2425
self,
25-
training_data: list[tuple[str, str]] | None = None,
26+
training_data: Optional[list[tuple[str, str]]] = None,
2627
model_path: str = "",
2728
):
2829
if model_path:

pythainlp/corpus/common.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from __future__ import annotations
99

1010
import ast
11+
from typing import Union
1112

1213
__all__ = [
1314
"countries",
@@ -82,7 +83,7 @@ def countries() -> frozenset[str]:
8283
return _THAI_COUNTRIES
8384

8485

85-
def provinces(details: bool = False) -> frozenset[str] | list[dict]:
86+
def provinces(details: bool = False) -> Union[frozenset[str], list[dict]]:
8687
"""Return a frozenset of Thailand province names in Thai such as "กระบี่",
8788
"กรุงเทพมหานคร", "กาญจนบุรี", and "อุบลราชธานี".
8889
\n(See: `dev/pythainlp/corpus/thailand_provinces_th.txt\

pythainlp/corpus/core.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import tarfile
1313
import zipfile
1414
from importlib.resources import files
15+
from typing import Optional
1516

1617
from pythainlp import __version__
1718
from pythainlp.corpus import corpus_db_path, corpus_db_url, corpus_path
@@ -206,7 +207,7 @@ def get_corpus_as_is(filename: str) -> list:
206207
return lines
207208

208209

209-
def get_corpus_default_db(name: str, version: str = "") -> str | None:
210+
def get_corpus_default_db(name: str, version: str = "") -> Optional[str]:
210211
"""Get model path from default_db.json
211212
212213
:param str name: corpus name
@@ -236,7 +237,7 @@ def get_corpus_default_db(name: str, version: str = "") -> str | None:
236237
return None
237238

238239

239-
def get_corpus_path(name: str, version: str = "", force: bool = False) -> str | None:
240+
def get_corpus_path(name: str, version: str = "", force: bool = False) -> Optional[str]:
240241
"""Get corpus path.
241242
242243
:param str name: corpus name

pythainlp/corpus/wordnet.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
from __future__ import annotations
1414

15+
from typing import Optional
16+
1517
import nltk
1618

1719
try:
@@ -27,12 +29,12 @@
2729
from nltk.corpus import wordnet
2830

2931

30-
def synsets(word: str, pos: str | None = None, lang: str = "tha"):
32+
def synsets(word: str, pos: Optional[str] = None, lang: str = "tha"):
3133
"""This function returns the synonym set for all lemmas of the given word
3234
with an optional argument to constrain the part of speech of the word.
3335
3436
:param str word: word to find synsets of
35-
:param str | None pos: constraint of the part of speech (i.e. *n* for Noun, *v*
37+
:param Optional[str] pos: constraint of the part of speech (i.e. *n* for Noun, *v*
3638
for Verb, *a* for Adjective, *s* for Adjective
3739
satellites, and *r* for Adverb). Default is None.
3840
:param str lang: abbreviation of language (i.e. *eng*, *tha*).
@@ -98,12 +100,12 @@ def synset(name_synsets):
98100
return wordnet.synset(name_synsets)
99101

100102

101-
def all_lemma_names(pos: str | None = None, lang: str = "tha"):
103+
def all_lemma_names(pos: Optional[str] = None, lang: str = "tha"):
102104
"""This function returns all lemma names for all synsets of the given
103105
part of speech tag and language. If part of speech tag is not
104106
specified, all synsets of all parts of speech will be used.
105107
106-
:param str | None pos: constraint of the part of speech (i.e. *n* for Noun,
108+
:param Optional[str] pos: constraint of the part of speech (i.e. *n* for Noun,
107109
*v* for Verb, *a* for Adjective, *s* for
108110
Adjective satellites, and *r* for Adverb).
109111
By default, *pos* is **None**.
@@ -140,11 +142,11 @@ def all_lemma_names(pos: str | None = None, lang: str = "tha"):
140142
return wordnet.all_lemma_names(pos=pos, lang=lang)
141143

142144

143-
def all_synsets(pos: str | None = None):
145+
def all_synsets(pos: Optional[str] = None):
144146
"""This function iterates over all synsets constrained by the given
145147
part of speech tag.
146148
147-
:param str | None pos: part of speech tag. Default is None.
149+
:param Optional[str] pos: part of speech tag. Default is None.
148150
149151
:return: list of synsets constrained by the given part of speech tag.
150152
:rtype: Iterable[:class:`Synset`]
@@ -188,12 +190,12 @@ def langs():
188190
return wordnet.langs()
189191

190192

191-
def lemmas(word: str, pos: str | None = None, lang: str = "tha"):
193+
def lemmas(word: str, pos: Optional[str] = None, lang: str = "tha"):
192194
"""This function returns all lemmas given the word with an optional
193195
argument to constrain the part of speech of the word.
194196
195197
:param str word: word to find lemmas of
196-
:param str | None pos: constraint of the part of speech (i.e. *n* for Noun,
198+
:param Optional[str] pos: constraint of the part of speech (i.e. *n* for Noun,
197199
*v* for Verb, *a* for Adjective, *s* for
198200
Adjective satellites, and *r* for Adverb). Default is None.
199201
:param str lang: abbreviation of language (i.e. *eng*, *tha*).
@@ -391,12 +393,12 @@ def wup_similarity(synsets1, synsets2):
391393
return wordnet.wup_similarity(synsets1, synsets2)
392394

393395

394-
def morphy(form, pos: str | None = None):
396+
def morphy(form, pos: Optional[str] = None):
395397
"""This function finds a possible base form for the given form,
396398
with the given part of speech.
397399
398400
:param str form: the form to finds the base form of
399-
:param str | None pos: part of speech tag of words to be searched.
401+
:param Optional[str] pos: part of speech tag of words to be searched.
400402
Default is None.
401403
402404
:return: base form of the given form

pythainlp/el/core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# SPDX-License-Identifier: Apache-2.0
44
from __future__ import annotations
55

6+
from typing import Union
7+
68

79
class EntityLinker:
810
def __init__(
@@ -35,7 +37,7 @@ def __init__(
3537

3638
self.model = MultiEL(model_name=self.model_name, device=self.device)
3739

38-
def get_el(self, list_text: list[str] | str) -> list[dict] | str:
40+
def get_el(self, list_text: Union[list[str], str]) -> Union[list[dict], str]:
3941
"""Get Entity Linking from Thai Text
4042
4143
:param str Union[List[str], str]: list of Thai text or text

pythainlp/generate/core.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from __future__ import annotations
1111

1212
import random
13+
from typing import Union
1314

1415
from pythainlp.corpus.oscar import (
1516
unigram_word_freqs as oscar_word_freqs_unigram,
@@ -50,7 +51,7 @@ def gen_sentence(
5051
prob: float = 0.001,
5152
output_str: bool = True,
5253
duplicate: bool = False,
53-
) -> list[str] | str:
54+
) -> Union[list[str], str]:
5455
""":param str start_seq: word to begin sentence with
5556
:param int N: number of words
5657
:param bool output_str: output as string
@@ -143,7 +144,7 @@ def gen_sentence(
143144
prob: float = 0.001,
144145
output_str: bool = True,
145146
duplicate: bool = False,
146-
) -> list[str] | str:
147+
) -> Union[list[str], str]:
147148
""":param str start_seq: word to begin sentence with
148149
:param int N: number of words
149150
:param bool output_str: output as string
@@ -234,7 +235,7 @@ def gen_sentence(
234235
prob: float = 0.001,
235236
output_str: bool = True,
236237
duplicate: bool = False,
237-
) -> list[str] | str:
238+
) -> Union[list[str], str]:
238239
""":param str start_seq: word to begin sentence with
239240
:param int N: number of words
240241
:param bool output_str: output as string

pythainlp/generate/thai2fit.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import pickle
1515
import random
16+
from typing import Union
1617

1718
# fastai
1819
import fastai
@@ -101,7 +102,7 @@ def gen_sentence(
101102
N: int = 4,
102103
prob: float = 0.001,
103104
output_str: bool = True,
104-
) -> list[str] | str:
105+
) -> Union[list[str], str]:
105106
"""Text generator using Thai2fit
106107
107108
:param str start_seq: word to begin sentence with

pythainlp/khavee/core.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
# ruff: noqa: C901
55
from __future__ import annotations
66

7+
from typing import List, Union
8+
79
from pythainlp import thai_consonants
810
from pythainlp.tokenize import subword_tokenize
911
from pythainlp.util import remove_tonemark, sound_syllable
@@ -381,7 +383,7 @@ def check_karu_lahu(self, text):
381383
else:
382384
return "lahu"
383385

384-
def check_klon(self, text: str, k_type: int = 8) -> list[str] | str:
386+
def check_klon(self, text: str, k_type: int = 8) -> Union[List[str], str]:
385387
"""
386388
Check the suitability of the poem according to Thai principles.
387389
@@ -625,8 +627,8 @@ def check_klon(self, text: str, k_type: int = 8) -> list[str] | str:
625627
return "Something went wrong. Make sure you enter it in the correct form."
626628

627629
def check_aek_too(
628-
self, text: list[str] | str, dead_syllable_as_aek: bool = False
629-
) -> list[bool] | list[str] | bool | str:
630+
self, text: Union[List[str], str], dead_syllable_as_aek: bool = False
631+
) -> Union[List[bool], List[str], bool, str]:
630632
"""
631633
Checker of Thai tonal words
632634

0 commit comments

Comments
 (0)