Skip to content

Commit ff182ad

Browse files
sayakpaula-r-r-o-w
andauthored
[CI] add a big GPU marker to run memory-intensive tests separately on CI (#9691)
* add a marker for big gpu tests * update * trigger on PRs temporarily. * onnx * fix * total memory * fixes * reduce memory threshold. * bigger gpu * empty * g6e * Apply suggestions from code review * address comments. * fix * fix * fix * fix * fix * okay * further reduce. * updates * remove * updates * updates * updates * updates * fixes * fixes * updates. * fix * workflow fixes. --------- Co-authored-by: Aryan <aryan@huggingface.co>
1 parent 4adf6af commit ff182ad

File tree

9 files changed

+181
-123
lines changed

9 files changed

+181
-123
lines changed

.github/workflows/nightly_tests.yml

+56
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,62 @@ jobs:
180180
pip install slack_sdk tabulate
181181
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
182182
183+
run_big_gpu_torch_tests:
184+
name: Torch tests on big GPU
185+
strategy:
186+
fail-fast: false
187+
max-parallel: 2
188+
runs-on:
189+
group: aws-g6e-xlarge-plus
190+
container:
191+
image: diffusers/diffusers-pytorch-cuda
192+
options: --shm-size "16gb" --ipc host --gpus 0
193+
steps:
194+
- name: Checkout diffusers
195+
uses: actions/checkout@v3
196+
with:
197+
fetch-depth: 2
198+
- name: NVIDIA-SMI
199+
run: nvidia-smi
200+
- name: Install dependencies
201+
run: |
202+
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
203+
python -m uv pip install -e [quality,test]
204+
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
205+
pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
206+
python -m uv pip install pytest-reportlog
207+
- name: Environment
208+
run: |
209+
python utils/print_env.py
210+
- name: Selected Torch CUDA Test on big GPU
211+
env:
212+
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
213+
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
214+
CUBLAS_WORKSPACE_CONFIG: :16:8
215+
BIG_GPU_MEMORY: 40
216+
run: |
217+
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
218+
-m "big_gpu_with_torch_cuda" \
219+
--make-reports=tests_big_gpu_torch_cuda \
220+
--report-log=tests_big_gpu_torch_cuda.log \
221+
tests/
222+
- name: Failure short reports
223+
if: ${{ failure() }}
224+
run: |
225+
cat reports/tests_big_gpu_torch_cuda_stats.txt
226+
cat reports/tests_big_gpu_torch_cuda_failures_short.txt
227+
- name: Test suite reports artifacts
228+
if: ${{ always() }}
229+
uses: actions/upload-artifact@v4
230+
with:
231+
name: torch_cuda_big_gpu_test_reports
232+
path: reports
233+
- name: Generate Report and Notify Channel
234+
if: always()
235+
run: |
236+
pip install slack_sdk tabulate
237+
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
238+
183239
run_flax_tpu_tests:
184240
name: Nightly Flax TPU Tests
185241
runs-on: docker-tpu

src/diffusers/utils/testing_utils.py

+21
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
) > version.parse("4.33")
5858

5959
USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
60+
BIG_GPU_MEMORY = int(os.getenv("BIG_GPU_MEMORY", 40))
6061

6162
if is_torch_available():
6263
import torch
@@ -310,6 +311,26 @@ def require_torch_accelerator_with_fp64(test_case):
310311
)
311312

312313

314+
def require_big_gpu_with_torch_cuda(test_case):
315+
"""
316+
Decorator marking a test that requires a bigger GPU (24GB) for execution. Some example pipelines: Flux, SD3, Cog,
317+
etc.
318+
"""
319+
if not is_torch_available():
320+
return unittest.skip("test requires PyTorch")(test_case)
321+
322+
import torch
323+
324+
if not torch.cuda.is_available():
325+
return unittest.skip("test requires PyTorch CUDA")(test_case)
326+
327+
device_properties = torch.cuda.get_device_properties(0)
328+
total_memory = device_properties.total_memory / (1024**3)
329+
return unittest.skipUnless(
330+
total_memory >= BIG_GPU_MEMORY, f"test requires a GPU with at least {BIG_GPU_MEMORY} GB memory"
331+
)(test_case)
332+
333+
313334
def require_torch_accelerator_with_training(test_case):
314335
"""Decorator marking a test that requires an accelerator with support for training."""
315336
return unittest.skipUnless(

tests/pipelines/controlnet_flux/test_controlnet_flux.py

+28-10
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
import unittest
1818

1919
import numpy as np
20+
import pytest
2021
import torch
22+
from huggingface_hub import hf_hub_download
2123
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
2224

2325
from diffusers import (
@@ -30,7 +32,8 @@
3032
from diffusers.utils import load_image
3133
from diffusers.utils.testing_utils import (
3234
enable_full_determinism,
33-
require_torch_gpu,
35+
numpy_cosine_similarity_distance,
36+
require_big_gpu_with_torch_cuda,
3437
slow,
3538
torch_device,
3639
)
@@ -180,7 +183,8 @@ def test_xformers_attention_forwardGenerator_pass(self):
180183

181184

182185
@slow
183-
@require_torch_gpu
186+
@require_big_gpu_with_torch_cuda
187+
@pytest.mark.big_gpu_with_torch_cuda
184188
class FluxControlNetPipelineSlowTests(unittest.TestCase):
185189
pipeline_class = FluxControlNetPipeline
186190

@@ -199,35 +203,49 @@ def test_canny(self):
199203
"InstantX/FLUX.1-dev-Controlnet-Canny-alpha", torch_dtype=torch.bfloat16
200204
)
201205
pipe = FluxControlNetPipeline.from_pretrained(
202-
"black-forest-labs/FLUX.1-dev", controlnet=controlnet, torch_dtype=torch.bfloat16
206+
"black-forest-labs/FLUX.1-dev",
207+
text_encoder=None,
208+
text_encoder_2=None,
209+
controlnet=controlnet,
210+
torch_dtype=torch.bfloat16,
203211
)
204212
pipe.enable_model_cpu_offload()
205213
pipe.set_progress_bar_config(disable=None)
206214

207215
generator = torch.Generator(device="cpu").manual_seed(0)
208-
prompt = "A girl in city, 25 years old, cool, futuristic"
209216
control_image = load_image(
210217
"https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Canny-alpha/resolve/main/canny.jpg"
218+
).resize((512, 512))
219+
220+
prompt_embeds = torch.load(
221+
hf_hub_download(repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/prompt_embeds.pt")
222+
)
223+
pooled_prompt_embeds = torch.load(
224+
hf_hub_download(
225+
repo_id="diffusers/test-slices", repo_type="dataset", filename="flux/pooled_prompt_embeds.pt"
226+
)
211227
)
212228

213229
output = pipe(
214-
prompt,
230+
prompt_embeds=prompt_embeds,
231+
pooled_prompt_embeds=pooled_prompt_embeds,
215232
control_image=control_image,
216233
controlnet_conditioning_scale=0.6,
217234
num_inference_steps=2,
218235
guidance_scale=3.5,
236+
max_sequence_length=256,
219237
output_type="np",
238+
height=512,
239+
width=512,
220240
generator=generator,
221241
)
222242

223243
image = output.images[0]
224244

225-
assert image.shape == (1024, 1024, 3)
245+
assert image.shape == (512, 512, 3)
226246

227247
original_image = image[-3:, -3:, -1].flatten()
228248

229-
expected_image = np.array(
230-
[0.33007812, 0.33984375, 0.33984375, 0.328125, 0.34179688, 0.33984375, 0.30859375, 0.3203125, 0.3203125]
231-
)
249+
expected_image = np.array([0.2734, 0.2852, 0.2852, 0.2734, 0.2754, 0.2891, 0.2617, 0.2637, 0.2773])
232250

233-
assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
251+
assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2

tests/pipelines/controlnet_flux/test_controlnet_flux_img2img.py

-71
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import gc
21
import unittest
32

43
import numpy as np
@@ -13,9 +12,6 @@
1312
FluxTransformer2DModel,
1413
)
1514
from diffusers.utils.testing_utils import (
16-
numpy_cosine_similarity_distance,
17-
require_torch_gpu,
18-
slow,
1915
torch_device,
2016
)
2117

@@ -222,70 +218,3 @@ def test_fused_qkv_projections(self):
222218
assert np.allclose(
223219
original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2
224220
), "Original outputs should match when fused QKV projections are disabled."
225-
226-
227-
@slow
228-
@require_torch_gpu
229-
class FluxControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
230-
pipeline_class = FluxControlNetImg2ImgPipeline
231-
repo_id = "black-forest-labs/FLUX.1-schnell"
232-
233-
def setUp(self):
234-
super().setUp()
235-
gc.collect()
236-
torch.cuda.empty_cache()
237-
238-
def tearDown(self):
239-
super().tearDown()
240-
gc.collect()
241-
torch.cuda.empty_cache()
242-
243-
def get_inputs(self, device, seed=0):
244-
if str(device).startswith("mps"):
245-
generator = torch.manual_seed(seed)
246-
else:
247-
generator = torch.Generator(device="cpu").manual_seed(seed)
248-
249-
image = torch.randn(1, 3, 64, 64).to(device)
250-
control_image = torch.randn(1, 3, 64, 64).to(device)
251-
252-
return {
253-
"prompt": "A photo of a cat",
254-
"image": image,
255-
"control_image": control_image,
256-
"num_inference_steps": 2,
257-
"guidance_scale": 5.0,
258-
"controlnet_conditioning_scale": 1.0,
259-
"strength": 0.8,
260-
"output_type": "np",
261-
"generator": generator,
262-
}
263-
264-
@unittest.skip("We cannot run inference on this model with the current CI hardware")
265-
def test_flux_controlnet_img2img_inference(self):
266-
pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16)
267-
pipe.enable_model_cpu_offload()
268-
269-
inputs = self.get_inputs(torch_device)
270-
271-
image = pipe(**inputs).images[0]
272-
image_slice = image[0, :10, :10]
273-
expected_slice = np.array(
274-
[
275-
[0.36132812, 0.30004883, 0.25830078],
276-
[0.36669922, 0.31103516, 0.23754883],
277-
[0.34814453, 0.29248047, 0.23583984],
278-
[0.35791016, 0.30981445, 0.23999023],
279-
[0.36328125, 0.31274414, 0.2607422],
280-
[0.37304688, 0.32177734, 0.26171875],
281-
[0.3671875, 0.31933594, 0.25756836],
282-
[0.36035156, 0.31103516, 0.2578125],
283-
[0.3857422, 0.33789062, 0.27563477],
284-
[0.3701172, 0.31982422, 0.265625],
285-
],
286-
dtype=np.float32,
287-
)
288-
289-
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
290-
291-
assert max_diff < 1e-4

tests/pipelines/controlnet_sd3/test_controlnet_sd3.py

+14-21
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import unittest
1818

1919
import numpy as np
20+
import pytest
2021
import torch
2122
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
2223

@@ -30,7 +31,8 @@
3031
from diffusers.utils import load_image
3132
from diffusers.utils.testing_utils import (
3233
enable_full_determinism,
33-
require_torch_gpu,
34+
numpy_cosine_similarity_distance,
35+
require_big_gpu_with_torch_cuda,
3436
slow,
3537
torch_device,
3638
)
@@ -195,7 +197,8 @@ def test_xformers_attention_forwardGenerator_pass(self):
195197

196198

197199
@slow
198-
@require_torch_gpu
200+
@require_big_gpu_with_torch_cuda
201+
@pytest.mark.big_gpu_with_torch_cuda
199202
class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
200203
pipeline_class = StableDiffusion3ControlNetPipeline
201204

@@ -238,11 +241,9 @@ def test_canny(self):
238241

239242
original_image = image[-3:, -3:, -1].flatten()
240243

241-
expected_image = np.array(
242-
[0.20947266, 0.1574707, 0.19897461, 0.15063477, 0.1418457, 0.17285156, 0.14160156, 0.13989258, 0.30810547]
243-
)
244+
expected_image = np.array([0.7314, 0.7075, 0.6611, 0.7539, 0.7563, 0.6650, 0.6123, 0.7275, 0.7222])
244245

245-
assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
246+
assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
246247

247248
def test_pose(self):
248249
controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Pose", torch_dtype=torch.float16)
@@ -272,15 +273,12 @@ def test_pose(self):
272273
assert image.shape == (1024, 1024, 3)
273274

274275
original_image = image[-3:, -3:, -1].flatten()
276+
expected_image = np.array([0.9048, 0.8740, 0.8936, 0.8516, 0.8799, 0.9360, 0.8379, 0.8408, 0.8652])
275277

276-
expected_image = np.array(
277-
[0.8671875, 0.86621094, 0.91015625, 0.8491211, 0.87890625, 0.9140625, 0.8300781, 0.8334961, 0.8623047]
278-
)
279-
280-
assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
278+
assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
281279

282280
def test_tile(self):
283-
controlnet = SD3ControlNetModel.from_pretrained("InstantX//SD3-Controlnet-Tile", torch_dtype=torch.float16)
281+
controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Tile", torch_dtype=torch.float16)
284282
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
285283
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
286284
)
@@ -307,12 +305,9 @@ def test_tile(self):
307305
assert image.shape == (1024, 1024, 3)
308306

309307
original_image = image[-3:, -3:, -1].flatten()
308+
expected_image = np.array([0.6699, 0.6836, 0.6226, 0.6572, 0.7310, 0.6646, 0.6650, 0.6694, 0.6011])
310309

311-
expected_image = np.array(
312-
[0.6982422, 0.7011719, 0.65771484, 0.6904297, 0.7416992, 0.6904297, 0.6977539, 0.7080078, 0.6386719]
313-
)
314-
315-
assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
310+
assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2
316311

317312
def test_multi_controlnet(self):
318313
controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny", torch_dtype=torch.float16)
@@ -344,8 +339,6 @@ def test_multi_controlnet(self):
344339
assert image.shape == (1024, 1024, 3)
345340

346341
original_image = image[-3:, -3:, -1].flatten()
347-
expected_image = np.array(
348-
[0.7451172, 0.7416992, 0.7158203, 0.7792969, 0.7607422, 0.7089844, 0.6855469, 0.71777344, 0.7314453]
349-
)
342+
expected_image = np.array([0.7207, 0.7041, 0.6543, 0.7500, 0.7490, 0.6592, 0.6001, 0.7168, 0.7231])
350343

351-
assert np.abs(original_image.flatten() - expected_image).max() < 1e-2
344+
assert numpy_cosine_similarity_distance(original_image.flatten(), expected_image) < 1e-2

0 commit comments

Comments
 (0)