Spaces:
Runtime error
Runtime error
| """ | |
| Using as reference: | |
| - https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512 | |
| - https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py | |
| - https://huggingface.co/facebook/detr-resnet-50-panoptic | |
| """ | |
| from transformers import DetrFeatureExtractor, DetrForSegmentation | |
| from PIL import Image | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| import torchvision | |
| # Returns a list with a color per ADE class (150 classes) | |
| # from https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py | |
| def ade_palette(): | |
| """ADE20K palette that maps each class to RGB values.""" | |
| return [ | |
| [120, 120, 120], | |
| [180, 120, 120], | |
| [6, 230, 230], | |
| [80, 50, 50], | |
| [4, 200, 3], | |
| [120, 120, 80], | |
| [140, 140, 140], | |
| [204, 5, 255], | |
| [230, 230, 230], | |
| [4, 250, 7], | |
| [224, 5, 255], | |
| [235, 255, 7], | |
| [150, 5, 61], | |
| [120, 120, 70], | |
| [8, 255, 51], | |
| [255, 6, 82], | |
| [143, 255, 140], | |
| [204, 255, 4], | |
| [255, 51, 7], | |
| [204, 70, 3], | |
| [0, 102, 200], | |
| [61, 230, 250], | |
| [255, 6, 51], | |
| [11, 102, 255], | |
| [255, 7, 71], | |
| [255, 9, 224], | |
| [9, 7, 230], | |
| [220, 220, 220], | |
| [255, 9, 92], | |
| [112, 9, 255], | |
| [8, 255, 214], | |
| [7, 255, 224], | |
| [255, 184, 6], | |
| [10, 255, 71], | |
| [255, 41, 10], | |
| [7, 255, 255], | |
| [224, 255, 8], | |
| [102, 8, 255], | |
| [255, 61, 6], | |
| [255, 194, 7], | |
| [255, 122, 8], | |
| [0, 255, 20], | |
| [255, 8, 41], | |
| [255, 5, 153], | |
| [6, 51, 255], | |
| [235, 12, 255], | |
| [160, 150, 20], | |
| [0, 163, 255], | |
| [140, 140, 140], | |
| [250, 10, 15], | |
| [20, 255, 0], | |
| [31, 255, 0], | |
| [255, 31, 0], | |
| [255, 224, 0], | |
| [153, 255, 0], | |
| [0, 0, 255], | |
| [255, 71, 0], | |
| [0, 235, 255], | |
| [0, 173, 255], | |
| [31, 0, 255], | |
| [11, 200, 200], | |
| [255, 82, 0], | |
| [0, 255, 245], | |
| [0, 61, 255], | |
| [0, 255, 112], | |
| [0, 255, 133], | |
| [255, 0, 0], | |
| [255, 163, 0], | |
| [255, 102, 0], | |
| [194, 255, 0], | |
| [0, 143, 255], | |
| [51, 255, 0], | |
| [0, 82, 255], | |
| [0, 255, 41], | |
| [0, 255, 173], | |
| [10, 0, 255], | |
| [173, 255, 0], | |
| [0, 255, 153], | |
| [255, 92, 0], | |
| [255, 0, 255], | |
| [255, 0, 245], | |
| [255, 0, 102], | |
| [255, 173, 0], | |
| [255, 0, 20], | |
| [255, 184, 184], | |
| [0, 31, 255], | |
| [0, 255, 61], | |
| [0, 71, 255], | |
| [255, 0, 204], | |
| [0, 255, 194], | |
| [0, 255, 82], | |
| [0, 10, 255], | |
| [0, 112, 255], | |
| [51, 0, 255], | |
| [0, 194, 255], | |
| [0, 122, 255], | |
| [0, 255, 163], | |
| [255, 153, 0], | |
| [0, 255, 10], | |
| [255, 112, 0], | |
| [143, 255, 0], | |
| [82, 0, 255], | |
| [163, 255, 0], | |
| [255, 235, 0], | |
| [8, 184, 170], | |
| [133, 0, 255], | |
| [0, 255, 92], | |
| [184, 0, 255], | |
| [255, 0, 31], | |
| [0, 184, 255], | |
| [0, 214, 255], | |
| [255, 0, 112], | |
| [92, 255, 0], | |
| [0, 224, 255], | |
| [112, 224, 255], | |
| [70, 184, 160], | |
| [163, 0, 255], | |
| [153, 0, 255], | |
| [71, 255, 0], | |
| [255, 0, 163], | |
| [255, 204, 0], | |
| [255, 0, 143], | |
| [0, 255, 235], | |
| [133, 255, 0], | |
| [255, 0, 235], | |
| [245, 0, 255], | |
| [255, 0, 122], | |
| [255, 245, 0], | |
| [10, 190, 212], | |
| [214, 255, 0], | |
| [0, 204, 255], | |
| [20, 0, 255], | |
| [255, 255, 0], | |
| [0, 153, 255], | |
| [0, 41, 255], | |
| [0, 255, 204], | |
| [41, 0, 255], | |
| [41, 255, 0], | |
| [173, 0, 255], | |
| [0, 245, 255], | |
| [71, 0, 255], | |
| [122, 0, 255], | |
| [0, 255, 184], | |
| [0, 92, 255], | |
| [184, 255, 0], | |
| [0, 133, 255], | |
| [255, 214, 0], | |
| [25, 194, 194], | |
| [102, 255, 0], | |
| [92, 0, 255], | |
| ] | |
| feature_extractor = DetrFeatureExtractor.from_pretrained('facebook/detr-resnet-50-panoptic') | |
| model = DetrForSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic') | |
| # gradio components | |
| input = gr.inputs.Image() | |
| output = gr.outputs.Image() | |
| def predict_animal_mask(im): | |
| image = Image.fromarray(im) # im: numpy array 3d: 480, 640, 3: to PIL Image | |
| image = image.resize((200,200)) # PIL image # could I upsample output instead? better? | |
| inputs = feature_extractor(images=image, return_tensors="pt") #pt=Pytorch, tf=TensorFlow | |
| outputs = model(**inputs) | |
| logits = outputs.logits # torch.Size([1, 100, 251]) | |
| bboxes = outputs.pred_boxes | |
| masks = outputs.pred_masks # torch.Size([1, 100, 200, 200]) | |
| # postprocess the mask (numpy arrays) | |
| label_per_pixel = torch.argmax(masks.squeeze(),dim=0).detach().numpy() | |
| color_mask = np.zeros(image.size+(3,)) | |
| for lbl, color in enumerate(ade_palette()): | |
| color_mask[label_per_pixel==lbl,:] = color | |
| # Show image + mask | |
| pred_img = np.array(image.convert('RGB'))*0.5 + color_mask*0.5 | |
| pred_img = pred_img.astype(np.uint8) | |
| return pred_img | |
| #################################################### | |
| # Create user interface and launch | |
| gr.Interface(predict_animal_mask, | |
| inputs = input, | |
| outputs = output, | |
| title = 'Animals* segmentation in images', | |
| description = "An animal* segmentation image webapp using DETR (End-to-End Object Detection) model with ResNet-50 backbone").launch() | |
| #################################### | |
| # url = "http://images.cocodataset.org/val2017/000000039769.jpg" | |
| # image = Image.open(requests.get(url, stream=True).raw) | |
| # inputs = feature_extractor(images=image, return_tensors="pt") | |
| # outputs = model(**inputs) | |
| # logits = outputs.logits # shape (batch_size, num_labels, height/4, width/4) | |