Spaces:
Running
on
Zero
Running
on
Zero
adjustable cfg
Browse files
app.py
CHANGED
|
@@ -25,6 +25,8 @@ pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device,
|
|
| 25 |
MAX_SEED = np.iinfo(np.int16).max
|
| 26 |
DEFAULT_POSITIVE_PROMPT = None
|
| 27 |
DEFAULT_NEGATIVE_PROMPT = None
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def _ensure_pil(x):
|
| 30 |
"""Ensure returned image is a PIL.Image.Image."""
|
|
@@ -36,11 +38,12 @@ def _ensure_pil(x):
|
|
| 36 |
if isinstance(x, np.ndarray):
|
| 37 |
if x.dtype != np.uint8:
|
| 38 |
x = (x * 255.0).clip(0, 255).astype(np.uint8)
|
| 39 |
-
if x.ndim == 3 and x.shape[0] in (1,3,4): # CHW -> HWC
|
| 40 |
x = np.moveaxis(x, 0, -1)
|
| 41 |
return Image.fromarray(x)
|
| 42 |
raise TypeError("Unsupported image type returned by pipeline.")
|
| 43 |
|
|
|
|
| 44 |
@spaces.GPU(duration=300)
|
| 45 |
def infer(
|
| 46 |
prompt=None,
|
|
@@ -48,6 +51,7 @@ def infer(
|
|
| 48 |
width=512,
|
| 49 |
height=512,
|
| 50 |
num_inference_steps=28,
|
|
|
|
| 51 |
positive_prompt=DEFAULT_POSITIVE_PROMPT,
|
| 52 |
negative_prompt=DEFAULT_NEGATIVE_PROMPT,
|
| 53 |
progress=gr.Progress(track_tqdm=True),
|
|
@@ -64,7 +68,7 @@ def infer(
|
|
| 64 |
num_images_per_caption=1,
|
| 65 |
positive_prompt=positive_prompt,
|
| 66 |
negative_prompt=negative_prompt,
|
| 67 |
-
cfg=
|
| 68 |
cfg_img=1.0,
|
| 69 |
cfg_schedule="constant",
|
| 70 |
use_norm=False,
|
|
@@ -76,6 +80,7 @@ def infer(
|
|
| 76 |
|
| 77 |
return _ensure_pil(imgs[0]) # Return raw output exactly as generated
|
| 78 |
|
|
|
|
| 79 |
css = """
|
| 80 |
#col-container {
|
| 81 |
margin: 0 auto;
|
|
@@ -85,7 +90,7 @@ css = """
|
|
| 85 |
|
| 86 |
with gr.Blocks(css=css) as demo:
|
| 87 |
with gr.Column(elem_id="col-container"):
|
| 88 |
-
gr.Markdown("# NextStep-1-Large —
|
| 89 |
|
| 90 |
with gr.Row():
|
| 91 |
prompt = gr.Text(
|
|
@@ -144,6 +149,14 @@ with gr.Blocks(css=css) as demo:
|
|
| 144 |
step=64,
|
| 145 |
value=512,
|
| 146 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
with gr.Row():
|
| 149 |
result_1 = gr.Image(
|
|
@@ -154,25 +167,66 @@ with gr.Blocks(css=css) as demo:
|
|
| 154 |
format="png",
|
| 155 |
)
|
| 156 |
|
| 157 |
-
# Click & Fill Examples (all <=512px)
|
| 158 |
examples = [
|
| 159 |
[
|
| 160 |
-
"
|
| 161 |
-
|
| 162 |
-
"photorealistic,
|
| 163 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
],
|
| 165 |
[
|
| 166 |
-
"
|
| 167 |
-
|
| 168 |
-
"
|
| 169 |
-
"washed out,
|
| 170 |
],
|
| 171 |
[
|
| 172 |
-
"
|
| 173 |
-
|
| 174 |
-
"
|
| 175 |
-
"
|
| 176 |
],
|
| 177 |
]
|
| 178 |
|
|
@@ -184,6 +238,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 184 |
width,
|
| 185 |
height,
|
| 186 |
num_inference_steps,
|
|
|
|
| 187 |
positive_prompt,
|
| 188 |
negative_prompt,
|
| 189 |
],
|
|
@@ -202,6 +257,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 202 |
width,
|
| 203 |
height,
|
| 204 |
num_inference_steps,
|
|
|
|
| 205 |
positive_prompt,
|
| 206 |
negative_prompt,
|
| 207 |
],
|
|
|
|
| 25 |
MAX_SEED = np.iinfo(np.int16).max
|
| 26 |
DEFAULT_POSITIVE_PROMPT = None
|
| 27 |
DEFAULT_NEGATIVE_PROMPT = None
|
| 28 |
+
DEFAULT_CFG = 7.5
|
| 29 |
+
|
| 30 |
|
| 31 |
def _ensure_pil(x):
|
| 32 |
"""Ensure returned image is a PIL.Image.Image."""
|
|
|
|
| 38 |
if isinstance(x, np.ndarray):
|
| 39 |
if x.dtype != np.uint8:
|
| 40 |
x = (x * 255.0).clip(0, 255).astype(np.uint8)
|
| 41 |
+
if x.ndim == 3 and x.shape[0] in (1, 3, 4): # CHW -> HWC
|
| 42 |
x = np.moveaxis(x, 0, -1)
|
| 43 |
return Image.fromarray(x)
|
| 44 |
raise TypeError("Unsupported image type returned by pipeline.")
|
| 45 |
|
| 46 |
+
|
| 47 |
@spaces.GPU(duration=300)
|
| 48 |
def infer(
|
| 49 |
prompt=None,
|
|
|
|
| 51 |
width=512,
|
| 52 |
height=512,
|
| 53 |
num_inference_steps=28,
|
| 54 |
+
cfg=DEFAULT_CFG,
|
| 55 |
positive_prompt=DEFAULT_POSITIVE_PROMPT,
|
| 56 |
negative_prompt=DEFAULT_NEGATIVE_PROMPT,
|
| 57 |
progress=gr.Progress(track_tqdm=True),
|
|
|
|
| 68 |
num_images_per_caption=1,
|
| 69 |
positive_prompt=positive_prompt,
|
| 70 |
negative_prompt=negative_prompt,
|
| 71 |
+
cfg=float(cfg),
|
| 72 |
cfg_img=1.0,
|
| 73 |
cfg_schedule="constant",
|
| 74 |
use_norm=False,
|
|
|
|
| 80 |
|
| 81 |
return _ensure_pil(imgs[0]) # Return raw output exactly as generated
|
| 82 |
|
| 83 |
+
|
| 84 |
css = """
|
| 85 |
#col-container {
|
| 86 |
margin: 0 auto;
|
|
|
|
| 90 |
|
| 91 |
with gr.Blocks(css=css) as demo:
|
| 92 |
with gr.Column(elem_id="col-container"):
|
| 93 |
+
gr.Markdown("# NextStep-1-Large — Image generation")
|
| 94 |
|
| 95 |
with gr.Row():
|
| 96 |
prompt = gr.Text(
|
|
|
|
| 149 |
step=64,
|
| 150 |
value=512,
|
| 151 |
)
|
| 152 |
+
cfg = gr.Slider(
|
| 153 |
+
label="CFG (guidance scale)",
|
| 154 |
+
minimum=0.0,
|
| 155 |
+
maximum=20.0,
|
| 156 |
+
step=0.5,
|
| 157 |
+
value=DEFAULT_CFG,
|
| 158 |
+
info="Higher = closer to text, lower = more creative",
|
| 159 |
+
)
|
| 160 |
|
| 161 |
with gr.Row():
|
| 162 |
result_1 = gr.Image(
|
|
|
|
| 167 |
format="png",
|
| 168 |
)
|
| 169 |
|
|
|
|
| 170 |
examples = [
|
| 171 |
[
|
| 172 |
+
"Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
|
| 173 |
+
101, 512, 512, 32, 7.5,
|
| 174 |
+
"photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
|
| 175 |
+
"over-smoothed skin, plastic look, extra limbs, watermark",
|
| 176 |
+
],
|
| 177 |
+
[
|
| 178 |
+
"Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
|
| 179 |
+
202, 512, 384, 30, 8.5,
|
| 180 |
+
"isometric view, clean lines, stylized, warm ambience, detailed furniture",
|
| 181 |
+
"text, logo, watermark, perspective distortion",
|
| 182 |
+
],
|
| 183 |
+
[
|
| 184 |
+
"Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
|
| 185 |
+
303, 512, 320, 28, 7.0,
|
| 186 |
+
"cinematic, volumetric light, natural colors, high dynamic range",
|
| 187 |
+
"over-saturated, haze artifacts, blown highlights",
|
| 188 |
+
],
|
| 189 |
+
[
|
| 190 |
+
"Cute red panda astronaut sticker, chibi style, white background",
|
| 191 |
+
404, 384, 384, 24, 9.0,
|
| 192 |
+
"vector look, bold outlines, high contrast, die-cut silhouette",
|
| 193 |
+
"background clutter, drop shadow, gradients, text",
|
| 194 |
+
],
|
| 195 |
+
[
|
| 196 |
+
"Product render of matte-black wireless headphones on reflective glass with soft studio lighting",
|
| 197 |
+
505, 512, 384, 28, 7.0,
|
| 198 |
+
"clean backdrop, realistic reflections, subtle bloom, high detail",
|
| 199 |
+
"noise, fingerprints, text, label",
|
| 200 |
+
],
|
| 201 |
+
[
|
| 202 |
+
"Graphic poster in Bauhaus style with geometric shapes and bold typography placeholders",
|
| 203 |
+
606, 512, 512, 22, 6.0,
|
| 204 |
+
"flat colors, minimal palette, crisp edges, balanced composition",
|
| 205 |
+
"photo realism, gradients, noisy texture",
|
| 206 |
+
],
|
| 207 |
+
[
|
| 208 |
+
"Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
|
| 209 |
+
707, 384, 512, 34, 7.0,
|
| 210 |
+
"textured canvas, visible brush strokes, dramatic sky, moody lighting",
|
| 211 |
+
"smooth digital look, airbrush, neon colors",
|
| 212 |
+
],
|
| 213 |
+
[
|
| 214 |
+
"Architectural concept art: glass pavilion in a pine forest at dawn, ground fog",
|
| 215 |
+
808, 512, 384, 30, 8.0,
|
| 216 |
+
"physically-based rendering, soft fog, realistic materials, scale figures",
|
| 217 |
+
"tilt, skew, warped geometry, chromatic aberration",
|
| 218 |
],
|
| 219 |
[
|
| 220 |
+
"Fantasy creature: bioluminescent jellyfish dragon swimming through a dark ocean trench",
|
| 221 |
+
909, 512, 512, 32, 8.5,
|
| 222 |
+
"glowing tendrils, soft caustics, particles, high detail",
|
| 223 |
+
"washed out, murky, low contrast, extra heads",
|
| 224 |
],
|
| 225 |
[
|
| 226 |
+
"Line art coloring page of a city skyline with hot air balloons",
|
| 227 |
+
111, 512, 512, 18, 5.5,
|
| 228 |
+
"clean black outlines, uniform stroke weight, high contrast, no shading",
|
| 229 |
+
"gray fill, gradients, cross-hatching, text",
|
| 230 |
],
|
| 231 |
]
|
| 232 |
|
|
|
|
| 238 |
width,
|
| 239 |
height,
|
| 240 |
num_inference_steps,
|
| 241 |
+
cfg,
|
| 242 |
positive_prompt,
|
| 243 |
negative_prompt,
|
| 244 |
],
|
|
|
|
| 257 |
width,
|
| 258 |
height,
|
| 259 |
num_inference_steps,
|
| 260 |
+
cfg,
|
| 261 |
positive_prompt,
|
| 262 |
negative_prompt,
|
| 263 |
],
|