17
17
import unittest
18
18
19
19
import numpy as np
20
+ import pytest
20
21
import torch
21
22
from transformers import AutoTokenizer , CLIPTextConfig , CLIPTextModelWithProjection , CLIPTokenizer , T5EncoderModel
22
23
30
31
from diffusers .utils import load_image
31
32
from diffusers .utils .testing_utils import (
32
33
enable_full_determinism ,
33
- require_torch_gpu ,
34
+ numpy_cosine_similarity_distance ,
35
+ require_big_gpu_with_torch_cuda ,
34
36
slow ,
35
37
torch_device ,
36
38
)
@@ -195,7 +197,8 @@ def test_xformers_attention_forwardGenerator_pass(self):
195
197
196
198
197
199
@slow
198
- @require_torch_gpu
200
+ @require_big_gpu_with_torch_cuda
201
+ @pytest .mark .big_gpu_with_torch_cuda
199
202
class StableDiffusion3ControlNetPipelineSlowTests (unittest .TestCase ):
200
203
pipeline_class = StableDiffusion3ControlNetPipeline
201
204
@@ -238,11 +241,9 @@ def test_canny(self):
238
241
239
242
original_image = image [- 3 :, - 3 :, - 1 ].flatten ()
240
243
241
- expected_image = np .array (
242
- [0.20947266 , 0.1574707 , 0.19897461 , 0.15063477 , 0.1418457 , 0.17285156 , 0.14160156 , 0.13989258 , 0.30810547 ]
243
- )
244
+ expected_image = np .array ([0.7314 , 0.7075 , 0.6611 , 0.7539 , 0.7563 , 0.6650 , 0.6123 , 0.7275 , 0.7222 ])
244
245
245
- assert np . abs (original_image .flatten () - expected_image ). max ( ) < 1e-2
246
+ assert numpy_cosine_similarity_distance (original_image .flatten (), expected_image ) < 1e-2
246
247
247
248
def test_pose (self ):
248
249
controlnet = SD3ControlNetModel .from_pretrained ("InstantX/SD3-Controlnet-Pose" , torch_dtype = torch .float16 )
@@ -272,15 +273,12 @@ def test_pose(self):
272
273
assert image .shape == (1024 , 1024 , 3 )
273
274
274
275
original_image = image [- 3 :, - 3 :, - 1 ].flatten ()
276
+ expected_image = np .array ([0.9048 , 0.8740 , 0.8936 , 0.8516 , 0.8799 , 0.9360 , 0.8379 , 0.8408 , 0.8652 ])
275
277
276
- expected_image = np .array (
277
- [0.8671875 , 0.86621094 , 0.91015625 , 0.8491211 , 0.87890625 , 0.9140625 , 0.8300781 , 0.8334961 , 0.8623047 ]
278
- )
279
-
280
- assert np .abs (original_image .flatten () - expected_image ).max () < 1e-2
278
+ assert numpy_cosine_similarity_distance (original_image .flatten (), expected_image ) < 1e-2
281
279
282
280
def test_tile (self ):
283
- controlnet = SD3ControlNetModel .from_pretrained ("InstantX// SD3-Controlnet-Tile" , torch_dtype = torch .float16 )
281
+ controlnet = SD3ControlNetModel .from_pretrained ("InstantX/SD3-Controlnet-Tile" , torch_dtype = torch .float16 )
284
282
pipe = StableDiffusion3ControlNetPipeline .from_pretrained (
285
283
"stabilityai/stable-diffusion-3-medium-diffusers" , controlnet = controlnet , torch_dtype = torch .float16
286
284
)
@@ -307,12 +305,9 @@ def test_tile(self):
307
305
assert image .shape == (1024 , 1024 , 3 )
308
306
309
307
original_image = image [- 3 :, - 3 :, - 1 ].flatten ()
308
+ expected_image = np .array ([0.6699 , 0.6836 , 0.6226 , 0.6572 , 0.7310 , 0.6646 , 0.6650 , 0.6694 , 0.6011 ])
310
309
311
- expected_image = np .array (
312
- [0.6982422 , 0.7011719 , 0.65771484 , 0.6904297 , 0.7416992 , 0.6904297 , 0.6977539 , 0.7080078 , 0.6386719 ]
313
- )
314
-
315
- assert np .abs (original_image .flatten () - expected_image ).max () < 1e-2
310
+ assert numpy_cosine_similarity_distance (original_image .flatten (), expected_image ) < 1e-2
316
311
317
312
def test_multi_controlnet (self ):
318
313
controlnet = SD3ControlNetModel .from_pretrained ("InstantX/SD3-Controlnet-Canny" , torch_dtype = torch .float16 )
@@ -344,8 +339,6 @@ def test_multi_controlnet(self):
344
339
assert image .shape == (1024 , 1024 , 3 )
345
340
346
341
original_image = image [- 3 :, - 3 :, - 1 ].flatten ()
347
- expected_image = np .array (
348
- [0.7451172 , 0.7416992 , 0.7158203 , 0.7792969 , 0.7607422 , 0.7089844 , 0.6855469 , 0.71777344 , 0.7314453 ]
349
- )
342
+ expected_image = np .array ([0.7207 , 0.7041 , 0.6543 , 0.7500 , 0.7490 , 0.6592 , 0.6001 , 0.7168 , 0.7231 ])
350
343
351
- assert np . abs (original_image .flatten () - expected_image ). max ( ) < 1e-2
344
+ assert numpy_cosine_similarity_distance (original_image .flatten (), expected_image ) < 1e-2
0 commit comments