Skip to content

Commit bf8aec3

Browse files
qianyu-dlutqianyu-dlut
authored andcommitted
fixbug
1 parent 6b62802 commit bf8aec3

File tree

2 files changed

+21
-35
lines changed

2 files changed

+21
-35
lines changed

examples/community/README.md

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5566,17 +5566,21 @@ import torch
55665566
from diffusers import VQModel, DiffusionPipeline
55675567
from transformers import AutoTokenizer
55685568

5569+
vqvae = VQModel.from_pretrained("Alpha-VLLM/Lumina-DiMOO", subfolder="vqvae").to(device='cuda', dtype=torch.bfloat16)
5570+
tokenizer = AutoTokenizer.from_pretrained("Alpha-VLLM/Lumina-DiMOO", trust_remote_code=True)
5571+
55695572
pipe = DiffusionPipeline.from_pretrained(
55705573
"Alpha-VLLM/Lumina-DiMOO",
5574+
vqvae=vqvae,
5575+
tokenizer=tokenizer,
55715576
torch_dtype=torch.bfloat16,
5572-
device_map="auto",
55735577
custom_pipeline="lumina_dimoo",
55745578
)
55755579
pipe.to("cuda")
55765580

55775581
prompt = '''A striking photograph of a glass of orange juice on a wooden kitchen table, capturing a playful moment. The orange juice splashes out of the glass and forms the word \"Smile\" in a whimsical, swirling script just above the glass. The background is softly blurred, revealing a cozy, homely kitchen with warm lighting and a sense of comfort.'''
55785582

5579-
out = pipe(
5583+
img = pipe(
55805584
prompt=prompt,
55815585
task="text_to_image",
55825586
height=768,
@@ -5587,7 +5591,7 @@ out = pipe(
55875591
cache_ratio=0.9,
55885592
warmup_ratio=0.3,
55895593
refresh_interval=5
5590-
)
5594+
).images[0]
55915595

55925596
img.save("t2i_test_output.png")
55935597
```
@@ -5604,11 +5608,14 @@ from diffusers import VQModel, DiffusionPipeline
56045608
from transformers import AutoTokenizer
56055609
from diffusers.utils import load_image
56065610

5611+
vqvae = VQModel.from_pretrained("Alpha-VLLM/Lumina-DiMOO", subfolder="vqvae").to(device='cuda', dtype=torch.bfloat16)
5612+
tokenizer = AutoTokenizer.from_pretrained("Alpha-VLLM/Lumina-DiMOO", trust_remote_code=True)
56075613

56085614
pipe = DiffusionPipeline.from_pretrained(
56095615
"Alpha-VLLM/Lumina-DiMOO",
5616+
vqvae=vqvae,
5617+
tokenizer=tokenizer,
56105618
torch_dtype=torch.bfloat16,
5611-
device_map="auto",
56125619
custom_pipeline="lumina_dimoo",
56135620
)
56145621
pipe.to("cuda")
@@ -5619,8 +5626,7 @@ input_image = load_image(
56195626

56205627
prompt = "A functional wooden printer stand.Nestled next to a brick wall in a bustling city street, it stands firm as pedestrians hustle by, illuminated by the warm glow of vintage street lamps."
56215628

5622-
5623-
out = pipe(
5629+
img = pipe(
56245630
prompt=prompt,
56255631
image=input_image,
56265632
edit_type="depth_control",
@@ -5629,9 +5635,10 @@ out = pipe(
56295635
cfg_scale=2.5,
56305636
cfg_img=4.0,
56315637
task="image_to_image"
5632-
)
5638+
).images[0]
56335639

56345640
img.save("i2i_test_output.png")
5641+
56355642
```
56365643

56375644

@@ -5742,11 +5749,14 @@ from diffusers import VQModel, DiffusionPipeline
57425749
from transformers import AutoTokenizer
57435750
from diffusers.utils import load_image
57445751

5752+
vqvae = VQModel.from_pretrained("Alpha-VLLM/Lumina-DiMOO", subfolder="vqvae").to(device='cuda', dtype=torch.bfloat16)
5753+
tokenizer = AutoTokenizer.from_pretrained("Alpha-VLLM/Lumina-DiMOO", trust_remote_code=True)
57455754

57465755
pipe = DiffusionPipeline.from_pretrained(
57475756
"Alpha-VLLM/Lumina-DiMOO",
5757+
vqvae=vqvae,
5758+
tokenizer=tokenizer,
57485759
torch_dtype=torch.bfloat16,
5749-
device_map="auto",
57505760
custom_pipeline="lumina_dimoo",
57515761
)
57525762
pipe.to("cuda")
@@ -5768,5 +5778,7 @@ out = pipe(
57685778
cfg_scale=0.0,
57695779
)
57705780

5771-
img.save("mmu_answer.txt")
5781+
text = getattr(out, "text", out)
5782+
with open("mmu_answer.txt", "w", encoding="utf-8") as f:
5783+
f.write(text.strip() + "\n")
57725784
```

examples/community/lumina_dimoo.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,8 +1960,6 @@ class LuminaDiMOOPipeline(DiffusionPipeline):
19601960
An `AutoTokenizer` to tokenize text prompts.
19611961
"""
19621962

1963-
model_cpu_offload_seq = "llm->vqvae"
1964-
19651963
def __init__(
19661964
self,
19671965
vqvae: VQModel,
@@ -2349,7 +2347,6 @@ def generate_text_understanding(
23492347

23502348

23512349
@torch.no_grad()
2352-
@replace_example_docstring(EXAMPLE_DOC_STRING)
23532350
def _image_to_image(
23542351
self,
23552352
prompt: str,
@@ -2362,29 +2359,6 @@ def _image_to_image(
23622359
cfg_img: float = 4.0,
23632360
output_type: Optional[str] = "pil",
23642361
):
2365-
"""
2366-
The call function to the pipeline for generation.
2367-
2368-
Args:
2369-
prompt (`str`):
2370-
The prompt to guide image generation.
2371-
image (`PIL.Image.Image` or `str`):
2372-
The input image for image-to-image generation. Can be a PIL Image or a path to an image file.
2373-
ref_image (`PIL.Image.Image` or `str`, *optional*):
2374-
The reference image for style transfer. Can be a PIL Image or a path to an image file.
2375-
edit_type (`str`, *optional*, defaults to `"canny_pred"`):
2376-
The type of image-to-image editing to perform.
2377-
num_inference_steps (`int`, *optional*, defaults to 64):
2378-
The number of denoising steps. More steps usually lead to a higher quality image at the expense of
2379-
slower inference.
2380-
temperature (`float`, *optional*, defaults to 1.0):
2381-
The temperature for sampling. A higher temperature introduces more randomness. 0 for deterministic.
2382-
cfg_scale (`float`, *optional*, defaults to 2.5):
2383-
Classifier-Free Guidance scale for text conditioning.
2384-
cfg_img (`float`, *optional*, defaults to 4.0):
2385-
Classifier-Free Guidance scale for image conditioning.
2386-
2387-
"""
23882362

23892363
if isinstance(prompt, list):
23902364
raise ValueError("Batching is not supported for this pipeline.")

0 commit comments

Comments
 (0)