sagorbrur · sagorbrur · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/README.md b/README.md
@@ -24,6 +24,9 @@ BNLP is a natural language processing toolkit for Bengali Language. This tool wi
 - [Corpus](./docs/README.md#bengali-corpus-class)
    - Letters, vowels, punctuations, stopwords
 - [Command Line Interface (CLI)](#command-line-interface)
+- [Pipeline API](#pipeline-api)
+- [Batch Processing](#batch-processing)
+- [Async Model Loading](#async-model-loading)
 
 ## Installation
 
@@ -128,6 +131,138 @@ bnlp tokenize "আমি বাংলায় গান গাই। তুম
 bnlp embedding "বাংলা" --similar --topn 5
 ```
 
+## Pipeline API
+
+Chain multiple NLP operations together using the Pipeline API.
+
+```python
+from bnlp import Pipeline, CleanText, BasicTokenizer
+
+# Create a pipeline
+pipeline = Pipeline([
+    CleanText(remove_url=True, remove_punct=True),
+    BasicTokenizer(),
+])
+
+# Process text through the pipeline
+result = pipeline("আমি বাংলায় গান গাই।")
+print(result)
+# Output: ['আমি', 'বাংলায়', 'গান', 'গাই']
+
+# Get detailed results with intermediate outputs
+result = pipeline.run("আমি বাংলায় গান গাই।", return_details=True)
+print(result.intermediate_results)
+```
+
+### Pre-built Pipelines
+
+```python
+from bnlp import create_tokenization_pipeline, create_ner_pipeline, create_pos_pipeline
+
+# Tokenization pipeline
+tokenizer_pipeline = create_tokenization_pipeline(clean=True, tokenizer_type="basic")
+tokens = tokenizer_pipeline("আমি বাংলায় গান গাই।")
+
+# NER pipeline
+ner_pipeline = create_ner_pipeline(clean=True)
+entities = ner_pipeline("সজীব ঢাকায় থাকেন।")
+
+# POS pipeline
+pos_pipeline = create_pos_pipeline(clean=True)
+tags = pos_pipeline("আমি ভাত খাই।")
+```
+
+## Batch Processing
+
+Process multiple texts efficiently using batch processing utilities.
+
+```python
+from bnlp import BasicTokenizer, tokenize_batch, tag_batch, clean_batch
+from bnlp import BengaliNER, CleanText
+
+# Batch tokenization
+tokenizer = BasicTokenizer()
+texts = ["আমি বাংলায় গান গাই।", "তুমি কোথায় যাও?", "সে বই পড়ে।"]
+results = tokenize_batch(tokenizer.tokenize, texts)
+print(results)
+# Output: [['আমি', 'বাংলায়', ...], ['তুমি', 'কোথায়', ...], ['সে', 'বই', ...]]
+
+# Batch NER tagging
+ner = BengaliNER()
+texts = ["সজীব ঢাকায় থাকেন।", "রবীন্দ্রনাথ ঠাকুর কলকাতায় জন্মগ্রহণ করেন।"]
+results = tag_batch(ner.tag, texts)
+
+# Batch text cleaning
+cleaner = CleanText(remove_url=True, remove_email=True)
+texts = ["email@example.com আমি", "https://example.com তুমি"]
+results = clean_batch(cleaner, texts)
+```
+
+### Using BatchProcessor
+
+```python
+from bnlp import BatchProcessor, BasicTokenizer
+
+tokenizer = BasicTokenizer()
+batch = BatchProcessor(tokenizer.tokenize, max_workers=4)
+
+texts = ["আমি বাংলায় গান গাই।"] * 100
+results = batch.process(texts, show_progress=True)
+```
+
+## Async Model Loading
+
+Load large models in the background without blocking your application.
+
+```python
+from bnlp import AsyncModelLoader, BengaliWord2Vec
+
+# Create async loader with callbacks
+def on_progress(progress):
+    print(f"Loading: {progress.progress * 100:.0f}% - {progress.message}")
+
+loader = AsyncModelLoader(
+    BengaliWord2Vec,
+    on_progress=on_progress,
+    on_complete=lambda m: print("Model ready!")
+)
+
+# Start loading in background
+loader.start_loading()
+
+# Do other work while model loads...
+print("Doing other work...")
+
+# Get model when needed (blocks until ready)
+model = loader.get_model()
+vector = model.get_word_vector("বাংলা")
+```
+
+### Lazy Loading
+
+```python
+from bnlp import LazyModelLoader, BengaliWord2Vec
+
+# Model not loaded yet
+lazy_model = LazyModelLoader(BengaliWord2Vec)
+
+# Model loads on first access
+model = lazy_model.get()
+vector = model.get_word_vector("বাংলা")
+```
+
+### Quick Async Loading
+
+```python
+from bnlp import load_model_async, BengaliWord2Vec
+
+# One-liner to start async loading
+loader = load_model_async(BengaliWord2Vec)
+
+# Get model when ready
+model = loader.get_model()
+```
+
 ## Documentation
 Full documentation are available [here](https://sagorbrur.github.io/bnlp/)
 

diff --git a/bnlp/__init__.py b/bnlp/__init__.py
@@ -1,11 +1,11 @@
 
-__version__ = "4.0.3"
+__version__ = "4.1.0"
 
 import os
 from bnlp.tokenizer.basic import BasicTokenizer
 from bnlp.tokenizer.nltk import NLTKTokenizer
 from bnlp.tokenizer.sentencepiece import (
-    SentencepieceTokenizer, 
+    SentencepieceTokenizer,
     SentencepieceTrainer,
 )
 
@@ -16,7 +16,7 @@
 from bnlp.embedding.glove import BengaliGlove
 
 from bnlp.embedding.doc2vec import (
-    BengaliDoc2vec, 
+    BengaliDoc2vec,
     BengaliDoc2vecTrainer,
 )
 
@@ -27,3 +27,28 @@
 from bnlp.cleantext.clean import CleanText
 
 from bnlp.corpus.corpus import BengaliCorpus
+
+# Core module - Protocols, Pipeline, Exceptions, Batch Processing, Async Loading
+from bnlp.core import (
+    # Pipeline
+    Pipeline,
+    PipelineStep,
+    PipelineResult,
+    create_tokenization_pipeline,
+    create_ner_pipeline,
+    create_pos_pipeline,
+    # Batch Processing
+    BatchProcessor,
+    tokenize_batch,
+    embed_batch,
+    tag_batch,
+    clean_batch,
+    # Async Loading
+    AsyncModelLoader,
+    LazyModelLoader,
+    load_model_async,
+    # Exceptions
+    BNLPException,
+    ModelNotFoundError,
+    ModelLoadError,
+)
diff --git a/bnlp/core/__init__.py b/bnlp/core/__init__.py
@@ -0,0 +1,98 @@
+"""
+BNLP Core Module
+
+This module provides core abstractions, protocols, and utilities for BNLP.
+"""
+
+from bnlp.core.protocols import (
+    TokenizerProtocol,
+    BatchTokenizerProtocol,
+    EmbeddingProtocol,
+    BatchEmbeddingProtocol,
+    SimilarityEmbeddingProtocol,
+    DocumentEmbeddingProtocol,
+    TaggerProtocol,
+    BatchTaggerProtocol,
+    TextProcessorProtocol,
+    PipelineStepProtocol,
+)
+
+from bnlp.core.pipeline import (
+    Pipeline,
+    PipelineStep,
+    PipelineResult,
+    create_tokenization_pipeline,
+    create_ner_pipeline,
+    create_pos_pipeline,
+)
+
+from bnlp.core.exceptions import (
+    BNLPException,
+    ModelNotFoundError,
+    ModelLoadError,
+    TokenizationError,
+    EmbeddingError,
+    TaggingError,
+    DownloadError,
+    PipelineError,
+    InvalidInputError,
+)
+
+from bnlp.core.batch import (
+    BatchProcessor,
+    tokenize_batch,
+    embed_batch,
+    tag_batch,
+    clean_batch,
+)
+
+from bnlp.core.async_loader import (
+    AsyncModelLoader,
+    LazyModelLoader,
+    LoadingStatus,
+    LoadingProgress,
+    load_model_async,
+)
+
+__all__ = [
+    # Protocols
+    "TokenizerProtocol",
+    "BatchTokenizerProtocol",
+    "EmbeddingProtocol",
+    "BatchEmbeddingProtocol",
+    "SimilarityEmbeddingProtocol",
+    "DocumentEmbeddingProtocol",
+    "TaggerProtocol",
+    "BatchTaggerProtocol",
+    "TextProcessorProtocol",
+    "PipelineStepProtocol",
+    # Pipeline
+    "Pipeline",
+    "PipelineStep",
+    "PipelineResult",
+    "create_tokenization_pipeline",
+    "create_ner_pipeline",
+    "create_pos_pipeline",
+    # Exceptions
+    "BNLPException",
+    "ModelNotFoundError",
+    "ModelLoadError",
+    "TokenizationError",
+    "EmbeddingError",
+    "TaggingError",
+    "DownloadError",
+    "PipelineError",
+    "InvalidInputError",
+    # Batch Processing
+    "BatchProcessor",
+    "tokenize_batch",
+    "embed_batch",
+    "tag_batch",
+    "clean_batch",
+    # Async Loading
+    "AsyncModelLoader",
+    "LazyModelLoader",
+    "LoadingStatus",
+    "LoadingProgress",
+    "load_model_async",
+]