import base64 import mimetypes from pathlib import Path import gradio as gr from typing import Optional, Tuple # ===== ユーティリティ ===== def file_to_data_url(path: str) -> Optional[str]: """画像ファイルを data URL に変換""" if not path: return None p = Path(path) if not p.exists(): return None mime, _ = mimetypes.guess_type(str(p)) if not mime: mime = "image/png" with open(p, "rb") as f: b64 = base64.b64encode(f.read()).decode("utf-8") return f"data:{mime};base64,{b64}" # ===== Meta Prompt ===== META_PROMPT = """You are an AI prompt generator for image-to-image transformation tasks in art and illustration pipelines. Given two images — A (input) and B (output) — and an optional description or notes, your goal is to write a precise, structured English prompt that fully explains how A transforms into B, including both conceptual and visual rules of the transformation. Instructions: 1) Write 3–6 concise sentences describing the transformation. 2) Begin with “The [type of A] transforms into …” or “Convert the [A] into …”. 3) Clearly describe: - What disappears or remains (e.g., hair, clothes, shadows, lines). - What structural or stylistic simplifications occur (e.g., box, sphere, guide lines). - How lines, colors, or lighting change (e.g., colored → lineart, flat → shaded). - Any rules for anatomy, proportion, or rendering (e.g., keep pose, maintain base colors). - Background or presentation constraints (e.g., white background, no shading). 4) Integrate any user-provided notes naturally into the text. Output Format: - English Prompt: A full paragraph (3–6 sentences) describing the transformation from A to B. - Name Suggestions: Propose 3 short, descriptive task-name candidates (e.g., Image2Body, Body2Box, Sketch2Line, Flat2Shade, Guide2Color, Light2Render, etc.). - Optional Japanese Translation: Provide a brief Japanese version of the English prompt for understanding if requested. Make the wording professional, objective, and consistent with technical art pipeline prompts. """ # ===== OpenAI 呼び出し ===== def call_openai_chat(api_key: str, a_data_url: Optional[str], b_data_url: Optional[str], notes: str, want_japanese: bool): """OpenAI GPT-5 API 呼び出し""" if not api_key: return ("", "", "(エラー:API Key が未入力です)") try: from openai import OpenAI except Exception: return ("", "", "(エラー:openai パッケージが見つかりません。`pip install openai` を実行してください)") client = OpenAI(api_key=api_key) model = "gpt-5" user_content = [] if a_data_url: user_content.append({"type": "text", "text": "Image A (input):"}) user_content.append({"type": "image_url", "image_url": {"url": a_data_url}}) if b_data_url: user_content.append({"type": "text", "text": "Image B (output):"}) user_content.append({"type": "image_url", "image_url": {"url": b_data_url}}) user_content.append({ "type": "text", "text": f"Additional notes: {notes or '(none)'}\n{'Include Japanese translation.' if want_japanese else 'No Japanese translation.'}" }) try: resp = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": META_PROMPT}, {"role": "user", "content": user_content}, ] ) text = resp.choices[0].message.content.strip() except Exception as e: return ("", "", f"(APIエラー:{e})") # 簡易パース english, names, japanese = "", "", "" lower = text.lower() if "name suggestions:" in lower: split = text.split("English Prompt:")[1].split("Name Suggestions:") english = split[0].strip() rest = split[1] if "Japanese Translation:" in rest: name_part, jp_part = rest.split("Japanese Translation:", 1) names = name_part.strip() japanese = jp_part.strip() else: names = rest.strip() else: english = text if not want_japanese: japanese = "" return english, names, japanese # ===== Gradio UI (standalone) ===== def build_ui() -> gr.Blocks: with gr.Blocks(title="A→B 変換プロンプト自動生成(GPT-5固定)") as demo: gr.Markdown(""" # 🎨 A→B 変換プロンプト自動生成 画像A(入力)と画像B(出力)、補足説明を入力すると、 A→B の変換内容を**英語プロンプト**として自動生成し、 さらに**タスク名候補(3件)**を提案します。 モデルは `gpt-5` を使用します。 """) api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...") with gr.Row(): img_a = gr.Image(type="filepath", label="Image A (Input)", height=300) img_b = gr.Image(type="filepath", label="Image B (Output)", height=300) notes = gr.Textbox(label="補足説明(日本語可)", lines=4, placeholder="例)髪・服・背景は消す。目は四角。鎖骨はピンク線。など", value="この画像は例であって、汎用的なプロンプトにする") want_japanese = gr.Checkbox(label="日本語訳を含める", value=True) run_btn = gr.Button("生成する", variant="primary") english_out = gr.Textbox(label="English Prompt", lines=8) names_out = gr.Textbox(label="Name Suggestions", lines=4) japanese_out = gr.Textbox(label="日本語訳(任意)", lines=8) def on_click(api_key_in, a_path, b_path, notes_in, ja_flag): a_url = file_to_data_url(a_path) if a_path else None b_url = file_to_data_url(b_path) if b_path else None return call_openai_chat(api_key_in, a_url, b_url, notes_in, ja_flag) run_btn.click( fn=on_click, inputs=[api_key, img_a, img_b, notes, want_japanese], outputs=[english_out, names_out, japanese_out], ) return demo if __name__ == "__main__": build_ui().launch()