Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

___

`xTuring` makes it simple, fast, and cost‑efficient to fine‑tune open‑source LLMs (e.g., GPT‑OSS, LLaMA/LLaMA 2, Falcon, Qwen3, GPT‑J, GPT‑2, OPT, Bloom, Cerebras, Galactica) on your own data — locally or in your private cloud.
`xTuring` makes it simple, fast, and cost‑efficient to fine‑tune open‑source LLMs (e.g., GPT‑OSS, LLaMA/LLaMA 2, Mistral/Ministral, Falcon, Qwen3, GPT‑J, GPT‑2, OPT, Bloom, Cerebras, Galactica) on your own data — locally or in your private cloud.

Why xTuring:
- Simple API for data prep, training, and inference
Expand Down Expand Up @@ -276,6 +276,8 @@ Below is a list of all the supported models via `BaseModel` class of `xTuring` a
|GPT-2 | gpt2|
|LLaMA | llama|
|LLaMA2 | llama2|
|Mistral-7B | mistral_7b|
|Ministral 3.14B | ministral_3_14b|
|MiniMaxM2 | minimax_m2|
|OPT-1.3B | opt|

Expand Down
2 changes: 2 additions & 0 deletions docs/docs/overview/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Use one task notebook from `examples/notebooks/`, then choose a model key from t
| LLaMA | `llama` | `base`, `lora`, `int8`, `lora_int8`, `lora_kbit` |
| LLaMA 2 | `llama2` | `base`, `lora`, `int8`, `lora_int8`, `lora_kbit` |
| Mamba | `mamba` | `base` |
| Mistral 7B | `mistral_7b` | `base` |
| Ministral 3.14B | `ministral_3_14b` | `base`, `lora`, `int8`, `lora_int8`, `lora_kbit` |
| MiniMaxM2 | `minimax_m2` | `base`, `lora`, `int8`, `lora_int8`, `lora_kbit` |
| OPT 1.3B | `opt` | `base`, `lora`, `int8`, `lora_int8` |
| Qwen3 0.6B | `qwen3_0_6b` | `base`, `lora`, `int8`, `lora_int8`, `lora_kbit` |
Expand Down
52 changes: 52 additions & 0 deletions src/xturing/config/finetuning_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,58 @@ mamba:
learning_rate: 5e-5
weight_decay: 0.01

mistral_7b:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

# Ministral 3.14B model fine-tuning configurations
ministral_3_14b:
learning_rate: 1e-5
weight_decay: 0.01
num_train_epochs: 1
batch_size: 1
gradient_accumulation_steps: 8
max_length: 2048
warmup_steps: 100

ministral_3_14b_lora:
learning_rate: 2e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 2
gradient_accumulation_steps: 4
max_length: 2048
warmup_steps: 100

ministral_3_14b_int8:
learning_rate: 1e-4
weight_decay: 0.01
num_train_epochs: 2
batch_size: 2
gradient_accumulation_steps: 4
max_length: 2048
warmup_steps: 100

ministral_3_14b_lora_int8:
learning_rate: 2e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 4
gradient_accumulation_steps: 2
max_length: 2048
warmup_steps: 100

ministral_3_14b_lora_kbit:
learning_rate: 2e-4
weight_decay: 0.01
num_train_epochs: 3
batch_size: 8
gradient_accumulation_steps: 1
max_length: 2048
warmup_steps: 100

# MiniMaxM2 model fine-tuning configurations
minimax_m2:
learning_rate: 1e-5
Expand Down
37 changes: 37 additions & 0 deletions src/xturing/config/generation_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,43 @@ llama2_lora_kbit:
mamba:
do_sample: false

# Contrastive search
mistral_7b:
penalty_alpha: 0.6
top_k: 4
max_new_tokens: 256
do_sample: false

# Contrastive search for Ministral 3.14B
ministral_3_14b:
penalty_alpha: 0.6
top_k: 4
max_new_tokens: 512
do_sample: false
temperature: 0.1

ministral_3_14b_lora:
penalty_alpha: 0.6
top_k: 4
max_new_tokens: 512
do_sample: false
temperature: 0.1

ministral_3_14b_int8:
max_new_tokens: 512
do_sample: false
temperature: 0.1

ministral_3_14b_lora_int8:
max_new_tokens: 512
do_sample: false
temperature: 0.1

ministral_3_14b_lora_kbit:
max_new_tokens: 512
do_sample: false
temperature: 0.1

# Contrastive search for MiniMaxM2
minimax_m2:
penalty_alpha: 0.6
Expand Down
18 changes: 18 additions & 0 deletions src/xturing/engines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@
MiniMaxM2LoraInt8Engine,
MiniMaxM2LoraKbitEngine,
)
from xturing.engines.mistral_engine import (
Ministral314BEngine,
Ministral314BInt8Engine,
Ministral314BLoraEngine,
Ministral314BLoraInt8Engine,
Ministral314BLoraKbitEngine,
Mistral7BEngine,
)
from xturing.engines.opt_engine import (
OPTEngine,
OPTInt8Engine,
Expand Down Expand Up @@ -149,6 +157,16 @@
BaseEngine.add_to_registry(LLama2LoraInt8Engine.config_name, LLama2LoraInt8Engine)
BaseEngine.add_to_registry(LLama2LoraKbitEngine.config_name, LLama2LoraKbitEngine)
BaseEngine.add_to_registry(MambaEngine.config_name, MambaEngine)
BaseEngine.add_to_registry(Mistral7BEngine.config_name, Mistral7BEngine)
BaseEngine.add_to_registry(Ministral314BEngine.config_name, Ministral314BEngine)
BaseEngine.add_to_registry(Ministral314BInt8Engine.config_name, Ministral314BInt8Engine)
BaseEngine.add_to_registry(Ministral314BLoraEngine.config_name, Ministral314BLoraEngine)
BaseEngine.add_to_registry(
Ministral314BLoraInt8Engine.config_name, Ministral314BLoraInt8Engine
)
BaseEngine.add_to_registry(
Ministral314BLoraKbitEngine.config_name, Ministral314BLoraKbitEngine
)
BaseEngine.add_to_registry(MiniMaxM2Engine.config_name, MiniMaxM2Engine)
BaseEngine.add_to_registry(MiniMaxM2Int8Engine.config_name, MiniMaxM2Int8Engine)
BaseEngine.add_to_registry(MiniMaxM2LoraEngine.config_name, MiniMaxM2LoraEngine)
Expand Down
99 changes: 99 additions & 0 deletions src/xturing/engines/mistral_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from pathlib import Path
from typing import Optional, Union

from xturing.engines.causal import CausalEngine, CausalLoraEngine, CausalLoraKbitEngine

_MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-v0.1"
_MINISTRAL_MODEL_NAME = "mistralai/Ministral-3-14B-Instruct-2512"
_MINISTRAL_TARGET_MODULES = [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
]


class Mistral7BEngine(CausalEngine):
config_name: str = "mistral_7b_engine"

def __init__(self, weights_path: Optional[Union[str, Path]] = None):
super().__init__(
model_name=_MISTRAL_MODEL_NAME,
weights_path=weights_path,
)

self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id


class Ministral314BEngine(CausalEngine):
config_name: str = "ministral_3_14b_engine"

def __init__(self, weights_path: Optional[Union[str, Path]] = None):
super().__init__(
model_name=_MINISTRAL_MODEL_NAME,
weights_path=weights_path,
)

self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id


class Ministral314BLoraEngine(CausalLoraEngine):
config_name: str = "ministral_3_14b_lora_engine"

def __init__(self, weights_path: Optional[Union[str, Path]] = None):
super().__init__(
model_name=_MINISTRAL_MODEL_NAME,
weights_path=weights_path,
target_modules=_MINISTRAL_TARGET_MODULES,
)

self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id


class Ministral314BInt8Engine(CausalEngine):
config_name: str = "ministral_3_14b_int8_engine"

def __init__(self, weights_path: Optional[Union[str, Path]] = None):
super().__init__(
model_name=_MINISTRAL_MODEL_NAME,
weights_path=weights_path,
load_8bit=True,
)

self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id


class Ministral314BLoraInt8Engine(CausalLoraEngine):
config_name: str = "ministral_3_14b_lora_int8_engine"

def __init__(self, weights_path: Optional[Union[str, Path]] = None):
super().__init__(
model_name=_MINISTRAL_MODEL_NAME,
weights_path=weights_path,
load_8bit=True,
target_modules=_MINISTRAL_TARGET_MODULES,
)

self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id


class Ministral314BLoraKbitEngine(CausalLoraKbitEngine):
config_name: str = "ministral_3_14b_lora_kbit_engine"

def __init__(self, weights_path: Optional[Union[str, Path]] = None):
super().__init__(
model_name=_MINISTRAL_MODEL_NAME,
weights_path=weights_path,
target_modules=_MINISTRAL_TARGET_MODULES,
)

self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
14 changes: 14 additions & 0 deletions src/xturing/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@
MiniMaxM2LoraInt8,
MiniMaxM2LoraKbit,
)
from xturing.models.mistral import (
Ministral314B,
Ministral314BInt8,
Ministral314BLora,
Ministral314BLoraInt8,
Ministral314BLoraKbit,
Mistral7B,
)
from xturing.models.opt import OPT, OPTInt8, OPTLora, OPTLoraInt8
from xturing.models.qwen import (
Qwen3,
Expand Down Expand Up @@ -126,6 +134,12 @@
BaseModel.add_to_registry(Llama2LoraInt8.config_name, Llama2LoraInt8)
BaseModel.add_to_registry(Llama2LoraKbit.config_name, Llama2LoraKbit)
BaseModel.add_to_registry(Mamba.config_name, Mamba)
BaseModel.add_to_registry(Mistral7B.config_name, Mistral7B)
BaseModel.add_to_registry(Ministral314B.config_name, Ministral314B)
BaseModel.add_to_registry(Ministral314BInt8.config_name, Ministral314BInt8)
BaseModel.add_to_registry(Ministral314BLora.config_name, Ministral314BLora)
BaseModel.add_to_registry(Ministral314BLoraInt8.config_name, Ministral314BLoraInt8)
BaseModel.add_to_registry(Ministral314BLoraKbit.config_name, Ministral314BLoraKbit)
BaseModel.add_to_registry(MiniMaxM2.config_name, MiniMaxM2)
BaseModel.add_to_registry(MiniMaxM2Int8.config_name, MiniMaxM2Int8)
BaseModel.add_to_registry(MiniMaxM2Lora.config_name, MiniMaxM2Lora)
Expand Down
59 changes: 59 additions & 0 deletions src/xturing/models/mistral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import Optional

from xturing.engines.mistral_engine import (
Ministral314BEngine,
Ministral314BInt8Engine,
Ministral314BLoraEngine,
Ministral314BLoraInt8Engine,
Ministral314BLoraKbitEngine,
Mistral7BEngine,
)
from xturing.models.causal import (
CausalInt8Model,
CausalLoraInt8Model,
CausalLoraKbitModel,
CausalLoraModel,
CausalModel,
)


class Mistral7B(CausalModel):
config_name: str = "mistral_7b"

def __init__(self, weights_path: Optional[str] = None):
super().__init__(Mistral7BEngine.config_name, weights_path)


class Ministral314B(CausalModel):
config_name: str = "ministral_3_14b"

def __init__(self, weights_path: Optional[str] = None):
super().__init__(Ministral314BEngine.config_name, weights_path)


class Ministral314BLora(CausalLoraModel):
config_name: str = "ministral_3_14b_lora"

def __init__(self, weights_path: Optional[str] = None):
super().__init__(Ministral314BLoraEngine.config_name, weights_path)


class Ministral314BInt8(CausalInt8Model):
config_name: str = "ministral_3_14b_int8"

def __init__(self, weights_path: Optional[str] = None):
super().__init__(Ministral314BInt8Engine.config_name, weights_path)


class Ministral314BLoraInt8(CausalLoraInt8Model):
config_name: str = "ministral_3_14b_lora_int8"

def __init__(self, weights_path: Optional[str] = None):
super().__init__(Ministral314BLoraInt8Engine.config_name, weights_path)


class Ministral314BLoraKbit(CausalLoraKbitModel):
config_name: str = "ministral_3_14b_lora_kbit"

def __init__(self, weights_path: Optional[str] = None):
super().__init__(Ministral314BLoraKbitEngine.config_name, weights_path)
Loading