Spaces:

yeq6x
/

QIE-LoRA-training-with-musubi-tuner

Running on Zero

QIE-LoRA-training-with-musubi-tuner / QIE_prompt_generator.py

Refactor Gradio UI in QIE_prompt_generator.py to encapsulate the layout within a build_ui function, enhancing modularity and readability. The update includes improved organization of input fields and maintains the existing functionality for prompt generation.

ff3f921 12 days ago

raw

history blame contribute delete

6.21 kB

	import base64
	import mimetypes
	from pathlib import Path
	import gradio as gr
	from typing import Optional, Tuple

	# ===== ユーティリティ =====

	def file_to_data_url(path: str) -> Optional[str]:
	"""画像ファイルを data URL に変換"""
	if not path:
	return None
	p = Path(path)
	if not p.exists():
	return None
	mime, _ = mimetypes.guess_type(str(p))
	if not mime:
	mime = "image/png"
	with open(p, "rb") as f:
	b64 = base64.b64encode(f.read()).decode("utf-8")
	return f"data:{mime};base64,{b64}"


	# ===== Meta Prompt =====
	META_PROMPT = """You are an AI prompt generator for image-to-image transformation tasks in art and illustration pipelines.
	Given two images — A (input) and B (output) — and an optional description or notes, your goal is to write a precise, structured English prompt that fully explains how A transforms into B, including both conceptual and visual rules of the transformation.

	Instructions:
	1) Write 3–6 concise sentences describing the transformation.
	2) Begin with “The [type of A] transforms into …” or “Convert the [A] into …”.
	3) Clearly describe:
	- What disappears or remains (e.g., hair, clothes, shadows, lines).
	- What structural or stylistic simplifications occur (e.g., box, sphere, guide lines).
	- How lines, colors, or lighting change (e.g., colored → lineart, flat → shaded).
	- Any rules for anatomy, proportion, or rendering (e.g., keep pose, maintain base colors).
	- Background or presentation constraints (e.g., white background, no shading).
	4) Integrate any user-provided notes naturally into the text.

	Output Format:
	- English Prompt: A full paragraph (3–6 sentences) describing the transformation from A to B.
	- Name Suggestions: Propose 3 short, descriptive task-name candidates (e.g., Image2Body, Body2Box, Sketch2Line, Flat2Shade, Guide2Color, Light2Render, etc.).
	- Optional Japanese Translation: Provide a brief Japanese version of the English prompt for understanding if requested.

	Make the wording professional, objective, and consistent with technical art pipeline prompts.
	"""


	# ===== OpenAI 呼び出し =====
	def call_openai_chat(api_key: str, a_data_url: Optional[str], b_data_url: Optional[str], notes: str, want_japanese: bool):
	"""OpenAI GPT-5 API 呼び出し"""
	if not api_key:
	return ("", "", "（エラー：API Key が未入力です）")

	try:
	from openai import OpenAI
	except Exception:
	return ("", "", "（エラー：openai パッケージが見つかりません。`pip install openai` を実行してください）")

	client = OpenAI(api_key=api_key)
	model = "gpt-5"

	user_content = []
	if a_data_url:
	user_content.append({"type": "text", "text": "Image A (input):"})
	user_content.append({"type": "image_url", "image_url": {"url": a_data_url}})
	if b_data_url:
	user_content.append({"type": "text", "text": "Image B (output):"})
	user_content.append({"type": "image_url", "image_url": {"url": b_data_url}})
	user_content.append({
	"type": "text",
	"text": f"Additional notes: {notes or '(none)'}\n{'Include Japanese translation.' if want_japanese else 'No Japanese translation.'}"
	})

	try:
	resp = client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": META_PROMPT},
	{"role": "user", "content": user_content},
	]
	)
	text = resp.choices[0].message.content.strip()
	except Exception as e:
	return ("", "", f"（APIエラー：{e}）")

	# 簡易パース
	english, names, japanese = "", "", ""
	lower = text.lower()
	if "name suggestions:" in lower:
	split = text.split("English Prompt:")[1].split("Name Suggestions:")
	english = split[0].strip()
	rest = split[1]
	if "Japanese Translation:" in rest:
	name_part, jp_part = rest.split("Japanese Translation:", 1)
	names = name_part.strip()
	japanese = jp_part.strip()
	else:
	names = rest.strip()
	else:
	english = text

	if not want_japanese:
	japanese = ""

	return english, names, japanese


	# ===== Gradio UI (standalone) =====

	def build_ui() -> gr.Blocks:
	with gr.Blocks(title="A→B 変換プロンプト自動生成（GPT-5固定）") as demo:
	gr.Markdown("""
	# 🎨 A→B 変換プロンプト自動生成
	画像A（入力）と画像B（出力）、補足説明を入力すると、
	A→B の変換内容を英語プロンプトとして自動生成し、
	さらにタスク名候補（3件）を提案します。
	モデルは `gpt-5` を使用します。
	""")

	api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
	with gr.Row():
	img_a = gr.Image(type="filepath", label="Image A (Input)", height=300)
	img_b = gr.Image(type="filepath", label="Image B (Output)", height=300)

	notes = gr.Textbox(label="補足説明（日本語可）", lines=4, placeholder="例）髪・服・背景は消す。目は四角。鎖骨はピンク線。など", value="この画像は例であって、汎用的なプロンプトにする")
	want_japanese = gr.Checkbox(label="日本語訳を含める", value=True)
	run_btn = gr.Button("生成する", variant="primary")

	english_out = gr.Textbox(label="English Prompt", lines=8)
	names_out = gr.Textbox(label="Name Suggestions", lines=4)
	japanese_out = gr.Textbox(label="日本語訳（任意）", lines=8)

	def on_click(api_key_in, a_path, b_path, notes_in, ja_flag):
	a_url = file_to_data_url(a_path) if a_path else None
	b_url = file_to_data_url(b_path) if b_path else None
	return call_openai_chat(api_key_in, a_url, b_url, notes_in, ja_flag)

	run_btn.click(
	fn=on_click,
	inputs=[api_key, img_a, img_b, notes, want_japanese],
	outputs=[english_out, names_out, japanese_out],
	)

	return demo


	if __name__ == "__main__":
	build_ui().launch()