Spaces:
Sleeping
Sleeping
| # Or save the image | |
| #output_image.save("output_image.jpg") | |
| from os import device_encoding | |
| from diffusers import StableDiffusionInpaintPipeline | |
| from PIL import Image | |
| import torch | |
| import numpy as np | |
| import torch | |
| import gc | |
| from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL | |
| from PIL import Image | |
| #import pose_estimation as pe | |
| import requests | |
| from rembg import remove | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| import sys | |
| import os | |
| import subprocess | |
| sys.path.append( | |
| os.path.join(os.path.dirname(__file__), "huggingface-cloth-segmentation")) | |
| from process import load_seg_model, get_palette, generate_mask | |
| device = 'cpu' | |
| def initialize_and_load_models(): | |
| checkpoint_path = 'model/cloth_segm.pth' | |
| net = load_seg_model(checkpoint_path, device=device) | |
| return net | |
| net = initialize_and_load_models() | |
| palette = get_palette(4) | |
| def run(img): | |
| cloth_seg = generate_mask(img, net=net, palette=palette, device=device) | |
| return cloth_seg | |
| def image_caption(image_path, img_type): | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| processor = BlipProcessor.from_pretrained("noamrot/FuseCap") | |
| model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap").to(device) | |
| raw_image = Image.open(image_path).convert('RGB') | |
| if img_type == "dress": | |
| raw_image = remove(raw_image) | |
| print("bg removed") | |
| raw_image.show | |
| #raw_image = img_np_no_bg | |
| text = "a picture of " | |
| inputs = processor(raw_image, text, return_tensors="pt").to(device) | |
| out = model.generate(**inputs, num_beams = 3) | |
| print(processor.decode(out[0], skip_special_tokens=True)) | |
| caption = processor.decode(out[0], skip_special_tokens=True) | |
| return caption | |
| def gen_vton(image_input, dress_input): | |
| # Load the pre-trained model | |
| pipe = StableDiffusionInpaintPipeline.from_pretrained( | |
| "runwayml/stable-diffusion-inpainting", | |
| #revision="fp16", # Or "full" to disable | |
| torch_dtype=torch.float32, # Or torch.float32 | |
| ) | |
| image_path = image_input | |
| #submodule_path = os.path.join(os.path.dirname(__file__), "huggingface-cloth-segmentation/process.py") | |
| img_open = Image.open(image_path) | |
| # | |
| run(img_open) | |
| gen_mask_1 = "./huggingface-cloth-segmentation/output/alpha/1.png" | |
| gen_mask_2 = "./huggingface-cloth-segmentation/output/alpha/2.png" | |
| gen_mask_3 = "./huggingface-cloth-segmentation/output/alpha/3.png" | |
| print("mask_generated") | |
| if gen_mask_1: | |
| mask_path = gen_mask_1 | |
| elif gen_mask_2: | |
| mask_path = gen_mask_2 | |
| else: | |
| mask_path = gen_mask_3 | |
| dress_path = dress_input | |
| image = Image.open(image_path) | |
| mask = Image.open(mask_path) # Convert mask to grayscale | |
| #image = Image.open("/content/drive/MyDrive/train1/train/image/000025.jpg") | |
| #mask = Image.open("/content/drive/MyDrive/train1/train/image/000014.jpg")# Convert mask to grayscale | |
| #image = download_image(img_url).resize((512, 512)) | |
| #mask = download_image(mask_url).resize((512, 512)) | |
| #image = Image.open(image_path) | |
| #mask_image = Image.open(mask_path) | |
| image = image.resize((512, 512)) | |
| mask = mask.resize((512, 512)) | |
| # Define your prompt (text input) | |
| user_caption = image_caption(image_path, "user") | |
| dress_caption = image_caption(dress_path, "dress") | |
| print(user_caption) | |
| print(dress_caption) | |
| prompt = " a human wearing a {dress_caption} " | |
| neg_prompt = "{user_caption}" | |
| # Note: `image` and `mask_image` should be PIL images. | |
| # The mask structure is white for inpainting and black for keeping as is. | |
| # Replace `image` and `mask_image` with your actual images. | |
| guidance_scale=7.5 | |
| denoising_strength=0.9 | |
| num_samples = 2 | |
| generator = torch.Generator(device="cpu") # Explicitly create a CPU generator | |
| images = pipe( | |
| prompt=prompt, | |
| negative_prompt=neg_prompt, | |
| image=image, | |
| mask_image=mask, | |
| guidance_scale=guidance_scale, | |
| denoising_strength=denoising_strength, | |
| generator=generator, | |
| num_images_per_prompt=num_samples, | |
| ).images | |
| #Image_1 = pipe(prompt=prompt, image=image, mask_image=mask).images[0] | |
| #images[0] # Display the image | |
| #img = Image.open(images[0]) | |
| #img.show() | |
| #img = Image.open(images[1]) | |
| #img.show() | |
| #images[2].show | |
| # Or save the image | |
| images[0].save("./processed_images/output_image.jpg") | |
| images[1].save("./processed_images/output_image_1.jpg") | |
| #images[2].save("output_image_2.jpg") | |
| #images[3].save("output_image_3.jpg") | |
| #images[3].save("output_image_4.jpg") | |
| #if app == "__main__": | |
| #gen_vton() | |
| #user_image = "C:/Users/Admin/Downloads/woman.jpg" | |
| #dress_image = "C:/Users/Admin/Downloads/dress1.jpg" | |
| #gen_vton(user_image, dress_image) | |
| def predict(dict, prompt): | |
| image = dict['image'].convert("RGB").resize((512, 512)) | |
| mask_image = dict['mask'].convert("RGB").resize((512, 512)) | |
| #images = pipe(prompt=prompt, image=image, mask_image=mask_image).images | |
| return(images[0]) | |