support kontext inference (#114)

qzzz95 · web-flow · commit ac82a1c2abc0 · 2025-07-08T17:13:21.000+08:00
* support kontext inference

* fix kontext unittest

* fix
diff --git a/diffsynth_engine/pipelines/controlnet_helper.py b/diffsynth_engine/pipelines/controlnet_helper.py
@@ -8,8 +8,8 @@
 
 @dataclass
 class ControlNetParams:
-    scale: float
     image: ImageType
+    scale: float = 1.0
     model: Optional[nn.Module] = None
     mask: Optional[ImageType] = None
     control_start: float = 0
diff --git a/diffsynth_engine/pipelines/flux_image.py b/diffsynth_engine/pipelines/flux_image.py
@@ -419,9 +419,10 @@ class ControlType(Enum):
     normal = "normal"
     bfl_control = "bfl_control"
     bfl_fill = "bfl_fill"
+    bfl_kontext = "bfl_kontext"
 
     def get_in_channel(self):
-        if self == ControlType.normal:
+        if self in [ControlType.normal, ControlType.bfl_kontext]:
             return 64
         elif self == ControlType.bfl_control:
             return 128
@@ -764,9 +765,15 @@ def predict_noise(
         current_step: int,
         total_step: int,
     ):
+        origin_latents_shape = latents.shape
         if self.control_type != ControlType.normal:
             controlnet_param = controlnet_params[0]
-            latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=1)
+            if self.control_type == ControlType.bfl_kontext:
+                latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=2)
+                image_ids = image_ids.repeat(1, 2, 1)
+                image_ids[:, image_ids.shape[1] // 2 :, 0] += 1
+            else:
+                latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=1)
             latents = latents.to(self.dtype)
             controlnet_params = []
 
@@ -797,6 +804,8 @@ def predict_noise(
             controlnet_double_block_output=double_block_output,
             controlnet_single_block_output=single_block_output,
         )
+        if self.control_type == ControlType.bfl_kontext:
+            noise_pred = noise_pred[:, :, : origin_latents_shape[2], : origin_latents_shape[3]]
         return noise_pred
 
     def prepare_latents(
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,8 @@ dependencies = [
     "pillow",
     "imageio[ffmpeg]",
     "yunchang ; sys_platform == 'linux'",
-    "onnxruntime"
+    "onnxruntime",
+    "opencv-python"
 ]
 
 [project.optional-dependencies]
diff --git a/tests/data/expect/flux/flux_bfl_kontext.png b/tests/data/expect/flux/flux_bfl_kontext.png
diff --git a/tests/data/input/flux_kontext_input.png b/tests/data/input/flux_kontext_input.png
diff --git a/tests/test_pipelines/test_flux_bfl_image.py b/tests/test_pipelines/test_flux_bfl_image.py
@@ -108,5 +108,26 @@ def test_fill_txt2img(self):
         self.assertImageEqualAndSaveFailed(image, "flux/flux_bfl_fill.png", threshold=0.99)
 
 
+class TestFLUXBFLKontextImage(ImageTestCase):
+    @classmethod
+    def setUpClass(cls):
+        kontext_model_path = fetch_model(
+            "black-forest-labs/FLUX.1-Kontext-dev", revision="master", path="flux1-kontext-dev.safetensors"
+        )
+        cls.pipe = FluxImagePipeline.from_pretrained(kontext_model_path, control_type=ControlType.bfl_kontext).eval()
+    
+    def test_kontext_image(self):
+        image = self.pipe(
+            prompt="Make the wall color to red",
+            height=1024,
+            width=1024,
+            controlnet_params=ControlNetParams(image=self.get_input_image("flux_kontext_input.png")),
+            cfg_scale=1.0,
+            seed=42,
+            num_inference_steps=30,
+        )
+        self.assertImageEqualAndSaveFailed(image, "flux/flux_bfl_kontext.png", threshold=0.99)
+
+
 if __name__ == "__main__":
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,8 @@ dependencies = [`
`30`	`30`	`"pillow",`
`31`	`31`	`"imageio[ffmpeg]",`
`32`	`32`	`"yunchang ; sys_platform == 'linux'",`
`33`		`- "onnxruntime"`
	`33`	`+ "onnxruntime",`
	`34`	`+ "opencv-python"`
`34`	`35`	`]`
`35`	`36`
`36`	`37`	`[project.optional-dependencies]`