Skip to content

Commit 152bcc5

Browse files
committed
fix: guard nighit, check_sara, check_marttra against empty input
nighit() crashed with IndexError when w2 had no consonants. check_sara("") and check_marttra("") crashed accessing word[-1]. Add input validation with clear error messages.
1 parent 01b0a86 commit 152bcc5

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

pythainlp/khavee/core.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ def check_sara(self, word: str) -> str:
5454
sara = []
5555
countoa = 0
5656

57+
if not word:
58+
return ""
59+
5760
# In case of การันย์
5861
if "์" in word[-1]:
5962
word = word[:-2]
@@ -253,6 +256,9 @@ def check_marttra(self, word: str) -> str:
253256
word = self.handle_karun_sound_silence(word)
254257
word = remove_tonemark(word)
255258

259+
if not word:
260+
return ""
261+
256262
# Check for ำ at the end (represents "am" sound, ends with m)
257263
if word[-1] == "ำ":
258264
return "กม"

pythainlp/morpheme/word_formation.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ def nighit(w1: str, w2: str) -> str:
3838
newword = []
3939
newword.append(list_w1[0])
4040
newword.append("ั")
41-
consonant_start = [i for i in list_w2 if i in set(thai_consonants)][0]
41+
consonants_in_w2 = [i for i in list_w2 if i in set(thai_consonants)]
42+
if not consonants_in_w2:
43+
raise ValueError(
44+
f"w2 '{w2}' contains no Thai consonants."
45+
)
46+
consonant_start = consonants_in_w2[0]
4247
if consonant_start in ["ก", "ช", "ค", "ข", "ง"]:
4348
newword.append("ง")
4449
elif consonant_start in ["จ", "ฉ", "ช", "ฌ"]:

0 commit comments

Comments
 (0)