sagorbrur · sagorbrur · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/.github/workflows/bnlp-publish-auto.yml b/.github/workflows/bnlp-publish-auto.yml
@@ -21,9 +21,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5
       with:
         python-version: '3.x'
     - name: Install dependencies

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -5,7 +5,7 @@ name: Building and Testing
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "main", "upgrade_for_python12" ]
   pull_request:
     branches: [ "main" ]
 
@@ -17,15 +17,12 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-        exclude:
-        - os: ubuntu-latest
-          python-version: "3.6"
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies

diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ BNLP is a natural language processing toolkit for Bengali Language. This tool wi
   ```
   pip install -U bnlp_toolkit
   ```
-  - Python: 3.8, 3.9, 3.10, 3.11
+  - Python: 3.8, 3.9, 3.10, 3.11, 3.12, 3.13
   - OS: Linux, Windows, Mac
 
 ### Build from source

diff --git a/bnlp/cleantext/clean.py b/bnlp/cleantext/clean.py
@@ -8,7 +8,7 @@
 
 from ftfy import fix_text
 from unicodedata import category, normalize
-from emoji import UNICODE_EMOJI, demojize, emojize
+import emoji
 
 def fix_bad_unicode(text, normalization="NFC"):
     return fix_text(text, normalization=normalization)
@@ -51,7 +51,7 @@ def remove_substrings(text, to_replace, replace_with=""):
     return result
 
 def remove_emoji(text):
-    return remove_substrings(text, UNICODE_EMOJI["en"])
+    return emoji.replace_emoji(text, replace="")
 
 def remove_number_or_digit(text, replace_with=""):
     return re.sub(constants.BANGLA_DIGIT_REGEX, replace_with, text)

diff --git a/bnlp/embedding/glove.py b/bnlp/embedding/glove.py
@@ -1,4 +1,3 @@
-import scipy
 import numpy as np
 from typing import List
 from scipy import spatial

diff --git a/bnlp/embedding/word2vec.py b/bnlp/embedding/word2vec.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from __future__ import print_function
-
 import warnings
 warnings.filterwarnings("ignore")
 

diff --git a/bnlp/tokenizer/basic.py b/bnlp/tokenizer/basic.py
@@ -4,28 +4,17 @@
 Code shamelessly copied from BERT tokenization
 To check Original Code: https://github.com/google-research/bert/blob/master/tokenization.py
 """
-import six
 import unicodedata
 from typing import List
 
 def convert_to_unicode(text):
     """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text.decode("utf-8", "ignore")
-        elif isinstance(text, unicode):
-            return text
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
+    if isinstance(text, str):
+        return text
+    elif isinstance(text, bytes):
+        return text.decode("utf-8", "ignore")
     else:
-        raise ValueError("Not running on Python2 or Python 3?")
+        raise ValueError("Unsupported string type: %s" % (type(text)))
 
 
 def whitespace_tokenize(text: str) -> List[str]:

diff --git a/docs/README.md b/docs/README.md
@@ -59,7 +59,7 @@ Table of contents
   ```
   pip install -U bnlp_toolkit
   ```
-  - Python: 3.6, 3.7, 3.8, 3.9, 3.10
+  - Python: 3.8, 3.9, 3.10, 3.11, 3.12, 3.13
   - OS: Linux, Windows, Mac
 
 

diff --git a/docs/REFACTORING_ANALYSIS.md b/docs/REFACTORING_ANALYSIS.md
@@ -443,25 +443,26 @@ $ bnlp download all
 
 | Dependency | Pinned | Latest | Risk |
 |------------|--------|--------|------|
-| `scipy==1.10.1` | Yes | 1.11+ | Security patches |
-| `gensim==4.3.2` | Yes | 4.3.3+ | Bug fixes |
-| `emoji==1.7.0` | Yes | 2.x | Breaking changes (API changed) |
-| `sklearn-crfsuite==0.3.6` | Yes | 0.5+ | Compatibility |
+| `scipy>=1.11.0` | No | 1.13+ | Updated for Python 3.12+ |
+| `gensim>=4.3.3` | No | 4.4+ | Updated for scipy compatibility |
+| `emoji>=2.0.0` | No | 2.15+ | Updated, code migrated to new API |
+| `sklearn-crfsuite>=0.5.0` | No | 0.5+ | Updated for Python 3.12+ |
 
 ### 6.2 Recommended Approach
 
 ```python
 install_requires=[
-    "sentencepiece>=0.2.0,<0.3.0",
-    "gensim>=4.3.0,<5.0.0",
-    "nltk>=3.8",
-    "numpy>=1.21",
-    "scipy>=1.10.0,<2.0.0",
-    "sklearn-crfsuite>=0.3.6,<1.0.0",
-    "tqdm>=4.60.0",
-    "ftfy>=6.0.0",
-    "emoji>=1.7.0,<2.0.0",  # Note: emoji 2.x has breaking changes
-    "requests>=2.25.0",
+    "sentencepiece>=0.2.0",
+    "gensim>=4.3.3",
+    "nltk",
+    "numpy",
+    "scipy>=1.11.0",
+    "sklearn-crfsuite>=0.5.0",
+    "tqdm>=4.66.3",
+    "ftfy>=6.2.0",
+    "emoji>=2.0.0",
+    "requests",
+    "symspellpy>=6.7.0",
 ],
 ```
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -112,7 +112,7 @@ PIP installer
 
    pip install -U bnlp_toolkit
 
--  Python: 3.6, 3.7, 3.8, 3.9, 3.10
+-  Python: 3.8, 3.9, 3.10, 3.11, 3.12, 3.13
 -  OS: Linux, Windows, Mac
 
 Pretrained Model

diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1,11 @@
-sentencepiece==0.2.0
-gensim==4.3.2
+sentencepiece>=0.2.0
+gensim>=4.3.3
 numpy
-scipy==1.10.1
-sklearn-crfsuite==0.3.6
-tqdm==4.66.3
-ftfy==6.2.0
-emoji==1.7.0
+scipy>=1.11.0
+sklearn-crfsuite>=0.5.0
+tqdm>=4.66.3
+ftfy>=6.2.0
+emoji>=2.0.0
 requests
 nltk
 symspellpy>=6.7.0
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 setuptools.setup(
     name="bnlp_toolkit",
-    version="4.4.0",
+    version="4.4.1",
     author="Sagor Sarker",
     author_email="sagorhem3532@gmail.com",
     description="BNLP is a natural language processing toolkit for Bengali Language",
@@ -18,17 +18,17 @@
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],
-    python_requires=">=3.6",
+    python_requires=">=3.8",
     install_requires=[
-        "sentencepiece==0.2.0",
-        "gensim==4.3.2",
+        "sentencepiece>=0.2.0",
+        "gensim>=4.3.3",
         "nltk",
         "numpy",
-        "scipy==1.10.1",
-        "sklearn-crfsuite==0.3.6",
-        "tqdm==4.66.3",
-        "ftfy==6.2.0",
-        "emoji==1.7.0",
+        "scipy>=1.11.0",
+        "sklearn-crfsuite>=0.5.0",
+        "tqdm>=4.66.3",
+        "ftfy>=6.2.0",
+        "emoji>=2.0.0",
         "requests",
         "symspellpy>=6.7.0",
     ],

diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/embedding/__init__.py b/tests/embedding/__init__.py
diff --git a/tests/token_classification/__init__.py b/tests/token_classification/__init__.py
diff --git a/tests/tokenizer/__init__.py b/tests/tokenizer/__init__.py