Spaces:

Gradio-Blocks
/

Gradio_YOLOv5_Det

Runtime error

App Files Files Community

Zengyf-CVer commited on May 28, 2022

Commit

e021555

1 Parent(s): 321feeb

v04 update

Browse files

Files changed (2) hide show

app.py +222 -68
requirements.txt +11 -5

app.py CHANGED Viewed

@@ -1,19 +1,22 @@
-# Gradio YOLOv5 Det v0.3
 # author: Zeng Yifu（曾逸夫）
-# creation time: 2022-05-09
 # email: [email protected]
 # project homepage: https://gitee.com/CV_Lab/gradio_yolov5_det
 import argparse
 import csv
 import json
 import sys
 from collections import Counter
 from pathlib import Path
-import pandas as pd
 import gradio as gr
 import torch
 import yaml
 from PIL import Image, ImageDraw, ImageFont
@@ -21,13 +24,13 @@ from PIL import Image, ImageDraw, ImageFont
 from util.fonts_opt import is_fonts
 from util.pdf_opt import pdf_generate
-ROOT_PATH = sys.path[0] # root directory
 # model path
 model_path = "ultralytics/yolov5"
 # Gradio YOLOv5 Det version
-GYD_VERSION = "Gradio YOLOv5 Det v0.3"
 # model name temporary variable
 model_name_tmp = ""
@@ -46,8 +49,9 @@ obj_style = ["Small Object", "Medium Object", "Large Object"]
 def parse_args(known=False):
-    parser = argparse.ArgumentParser(description="Gradio YOLOv5 Det v0.3")
     parser.add_argument("--source", "-src", default="upload", type=str, help="input source")
     parser.add_argument("--img_tool", "-it", default="editor", type=str, help="input image tool")
     parser.add_argument("--model_name", "-mn", default="yolov5s", type=str, help="model name")
     parser.add_argument(
@@ -117,10 +121,10 @@ def yaml_csv(file_path, file_tag):
     file_suffix = Path(file_path).suffix
     if file_suffix == suffix_list[0]:
         # model name
-        file_names = [i[0] for i in list(csv.reader(open(file_path)))] # csv version
     elif file_suffix == suffix_list[1]:
         # model name
-        file_names = yaml_parse(file_path).get(file_tag) # yaml version
     else:
         print(f"{file_path} is not in the correct format! Program exits!")
         sys.exit()
@@ -132,9 +136,7 @@ def yaml_csv(file_path, file_tag):
 def model_loading(model_name, device):
     # load model
-    model = torch.hub.load(
-        model_path, model_name, force_reload=True, device=device, _verbose=False
-    )
     return model
@@ -162,15 +164,15 @@ def pil_draw(img, countdown_msg, textFont, xyxy, font_size, opt):
     img_pil = ImageDraw.Draw(img)
-    img_pil.rectangle(xyxy, fill=None, outline="green") # bounding box
     if "label" in opt:
-        text_w, text_h = textFont.getsize(countdown_msg) # Label size
         img_pil.rectangle(
             (xyxy[0], xyxy[1], xyxy[0] + text_w, xyxy[1] + text_h),
             fill="green",
             outline="green",
-        ) # label background
         img_pil.multiline_text(
             (xyxy[0], xyxy[1]),
             countdown_msg,
@@ -183,7 +185,7 @@ def pil_draw(img, countdown_msg, textFont, xyxy, font_size, opt):
 # YOLOv5 image detection function
-def yolo_det(img, device, model_name, infer_size, conf, iou, max_num, model_cls, opt):
     global model, model_name_tmp, device_tmp
@@ -203,15 +205,15 @@ def yolo_det(img, device, model_name, infer_size, conf, iou, max_num, model_cls,
         model = model_loading(model_name_tmp, device)
     # -------------Model tuning -------------
-    model.conf = conf # NMS confidence threshold
-    model.iou = iou # NMS IoU threshold
-    model.max_det = int(max_num) # Maximum number of detection frames
-    model.classes = model_cls # model classes
-    img_size = img.size # frame size
-    results = model(img, size=infer_size) # detection
     # Data Frame
     dataframe = results.pandas().xyxy[0].round(2)
@@ -231,9 +233,9 @@ def yolo_det(img, device, model_name, infer_size, conf, iou, max_num, model_cls,
     for result in results.xyxyn:
         for i in range(len(result)):
-            id = int(i) # instance ID
-            obj_cls_index = int(result[i][5]) # category index
-            obj_cls = model_cls_name_cp[obj_cls_index] # category
             cls_det_stat.append(obj_cls)
             # ------------ border coordinates ------------
@@ -248,7 +250,7 @@ def yolo_det(img, device, model_name, infer_size, conf, iou, max_num, model_cls,
             x1 = int(img_size[0] * x1)
             y1 = int(img_size[1] * y1)
-            conf = float(result[i][4]) # confidence
             # fps = f"{(1000 / float(results.t[1])):.2f}" # FPS
             det_img = pil_draw(
@@ -267,9 +269,10 @@ def yolo_det(img, device, model_name, infer_size, conf, iou, max_num, model_cls,
             area_obj_all.append(area_obj)
     # ------------JSON generate------------
-    det_json = export_json(results, img.size)[0] # Detection information
-    det_json_format = json.dumps(det_json, sort_keys=False, indent=4, separators=(",", ":"), ensure_ascii=False) # JSON formatting
     if "json" not in opt:
         det_json = None
@@ -301,16 +304,115 @@ def yolo_det(img, device, model_name, infer_size, conf, iou, max_num, model_cls,
     for k, v in clsDet_dict.items():
         clsRatio_dict[k] = v / clsDet_dict_sum
     return det_img, objSize_dict, clsRatio_dict, det_json, report, dataframe
 def main(args):
     gr.close_all()
     global model, model_cls_name_cp, cls_name
     source = args.source
     img_tool = args.img_tool
     nms_conf = args.nms_conf
     nms_iou = args.nms_iou
@@ -325,55 +427,86 @@ def main(args):
     usr_pwd = args.usr_pwd
     is_share = args.is_share
-    is_fonts(f"{ROOT_PATH}/fonts") # Check font files
     # model loading
     model = model_loading(model_name, device)
-    model_names = yaml_csv(model_cfg, "model_names") # model names
-    model_cls_name = yaml_csv(cls_name, "model_cls_name") # class name
-    model_cls_name_cp = model_cls_name.copy() # class name
     # ------------------- Input Components -------------------
     inputs_img = gr.Image(image_mode="RGB", source=source, tool=img_tool, type="pil", label="original image")
-    inputs_device = gr.Radio(choices=["cuda:0", "cpu"], value=device, label="device")
-    inputs_model = gr.Dropdown(choices=model_names, value=model_name, type="value", label="model")
-    inputs_size = gr.Radio(choices=[320, 640, 1280], value=inference_size, label="inference size")
-    input_conf = gr.Slider(0, 1, step=slider_step, value=nms_conf, label="confidence threshold")
-    inputs_iou = gr.Slider(0, 1, step=slider_step, value=nms_iou, label="IoU threshold")
-    inputs_maxnum = gr.Number(value=max_detnum, label="Maximum number of detections")
-    inputs_clsName = gr.CheckboxGroup(choices=model_cls_name, value=model_cls_name, type="index", label="category")
-    inputs_opt = gr.CheckboxGroup(choices=["label", "pdf", "json"],
-                                         value=["label", "pdf"],
-                                         type="value",
-                                         label="operate")
     # Input parameters
-    inputs = [
-        inputs_img, # input image
-        inputs_device, # device
-        inputs_model, # model
-        inputs_size, # inference size
-        input_conf, # confidence threshold
-        inputs_iou, # IoU threshold
-        inputs_maxnum, # maximum number of detections
-        inputs_clsName, # category
-        inputs_opt, # detect operations
     ]
-    # Output parameters
     outputs_img = gr.Image(type="pil", label="Detection image")
     outputs_json = gr.JSON(label="Detection information")
     outputs_pdf = gr.File(label="Download test report")
-    outputs_df = gr.Dataframe(max_rows=5, overflow_row_behaviour="paginate", type="pandas", label="List of detection information")
     outputs_objSize = gr.Label(label="Object size ratio statistics")
     outputs_clsSize = gr.Label(label="Category detection proportion statistics")
-    outputs = [outputs_img, outputs_objSize, outputs_clsSize, outputs_json, outputs_pdf, outputs_df]
     # title
-    title = "Gradio YOLOv5 Det v0.3"
     # describe
     description = "<div align='center'>Customizable target detection model, easy to install, easy to use</div>"
@@ -423,18 +556,39 @@ def main(args):
             ["label", "pdf"],],]
     # interface
-    gyd = gr.Interface(
-        fn=yolo_det,
-        inputs=inputs,
-        outputs=outputs,
         title=title,
         description=description,
         # article=article,
         # examples=examples,
         # theme="seafoam",
-        # flagging_dir="run", # output directory
     )
     if not is_login:
         gyd.launch(
             inbrowser=True,  # Automatically open default browser
@@ -458,4 +612,4 @@ def main(args):
 if __name__ == "__main__":
     args = parse_args()
-    main(args)

+# Gradio YOLOv5 Det v0.4
 # author: Zeng Yifu（曾逸夫）
+# creation time: 2022-05-28
 # email: [email protected]
 # project homepage: https://gitee.com/CV_Lab/gradio_yolov5_det
 import argparse
 import csv
+import gc
 import json
+import os
 import sys
 from collections import Counter
 from pathlib import Path
+import cv2
 import gradio as gr
+import numpy as np
+import pandas as pd
 import torch
 import yaml
 from PIL import Image, ImageDraw, ImageFont
 from util.fonts_opt import is_fonts
 from util.pdf_opt import pdf_generate
+ROOT_PATH = sys.path[0]  # root directory
 # model path
 model_path = "ultralytics/yolov5"
 # Gradio YOLOv5 Det version
+GYD_VERSION = "Gradio YOLOv5 Det v0.4"
 # model name temporary variable
 model_name_tmp = ""
 def parse_args(known=False):
+    parser = argparse.ArgumentParser(description="Gradio YOLOv5 Det v0.4")
     parser.add_argument("--source", "-src", default="upload", type=str, help="input source")
+    parser.add_argument("--source_video", "-src_v", default="webcam", type=str, help="video input source")
     parser.add_argument("--img_tool", "-it", default="editor", type=str, help="input image tool")
     parser.add_argument("--model_name", "-mn", default="yolov5s", type=str, help="model name")
     parser.add_argument(
     file_suffix = Path(file_path).suffix
     if file_suffix == suffix_list[0]:
         # model name
+        file_names = [i[0] for i in list(csv.reader(open(file_path)))]  # csv version
     elif file_suffix == suffix_list[1]:
         # model name
+        file_names = yaml_parse(file_path).get(file_tag)  # yaml version
     else:
         print(f"{file_path} is not in the correct format! Program exits!")
         sys.exit()
 def model_loading(model_name, device):
     # load model
+    model = torch.hub.load(model_path, model_name, force_reload=True, device=device, _verbose=False)
     return model
     img_pil = ImageDraw.Draw(img)
+    img_pil.rectangle(xyxy, fill=None, outline="green")  # bounding box
     if "label" in opt:
+        text_w, text_h = textFont.getsize(countdown_msg)  # Label size
         img_pil.rectangle(
             (xyxy[0], xyxy[1], xyxy[0] + text_w, xyxy[1] + text_h),
             fill="green",
             outline="green",
+        )  # label background
         img_pil.multiline_text(
             (xyxy[0], xyxy[1]),
             countdown_msg,
 # YOLOv5 image detection function
+def yolo_det_img(img, device, model_name, infer_size, conf, iou, max_num, model_cls, opt):
     global model, model_name_tmp, device_tmp
         model = model_loading(model_name_tmp, device)
     # -------------Model tuning -------------
+    model.conf = conf  # NMS confidence threshold
+    model.iou = iou  # NMS IoU threshold
+    model.max_det = int(max_num)  # Maximum number of detection frames
+    model.classes = model_cls  # model classes
+    img_size = img.size  # frame size
+    results = model(img, size=infer_size)  # detection
     # Data Frame
     dataframe = results.pandas().xyxy[0].round(2)
     for result in results.xyxyn:
         for i in range(len(result)):
+            id = int(i)  # instance ID
+            obj_cls_index = int(result[i][5])  # category index
+            obj_cls = model_cls_name_cp[obj_cls_index]  # category
             cls_det_stat.append(obj_cls)
             # ------------ border coordinates ------------
             x1 = int(img_size[0] * x1)
             y1 = int(img_size[1] * y1)
+            conf = float(result[i][4])  # confidence
             # fps = f"{(1000 / float(results.t[1])):.2f}" # FPS
             det_img = pil_draw(
             area_obj_all.append(area_obj)
     # ------------JSON generate------------
+    det_json = export_json(results, img.size)[0]  # Detection information
+    det_json_format = json.dumps(det_json, sort_keys=False, indent=4, separators=(",", ":"),
+                                 ensure_ascii=False)  # JSON formatting
     if "json" not in opt:
         det_json = None
     for k, v in clsDet_dict.items():
         clsRatio_dict[k] = v / clsDet_dict_sum
     return det_img, objSize_dict, clsRatio_dict, det_json, report, dataframe
+# YOLOv5 video detection function
+def yolo_det_video(video, device, model_name, infer_size, conf, iou, max_num, model_cls, opt):
+    global model, model_name_tmp, device_tmp
+    os.system("""
+        if [ -e './output.mp4' ]; then
+        rm ./output.mp4
+        fi
+        """)
+    if model_name_tmp != model_name:
+        # Model judgment to avoid repeated loading
+        model_name_tmp = model_name
+        model = model_loading(model_name_tmp, device)
+    elif device_tmp != device:
+        device_tmp = device
+        model = model_loading(model_name_tmp, device)
+    # -------------Model tuning -------------
+    model.conf = conf  # NMS confidence threshold
+    model.iou = iou  # NMS IOU threshold
+    model.max_det = int(max_num)  # Maximum number of detection frames
+    model.classes = model_cls  # model classes
+    # ----------------Load fonts----------------
+    yaml_index = cls_name.index(".yaml")
+    cls_name_lang = cls_name[yaml_index - 2:yaml_index]
+    if cls_name_lang == "zh":
+        # Chinese
+        textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/SimSun.ttf"), size=FONTSIZE)
+    elif cls_name_lang in ["en", "ru", "es", "ar"]:
+        # English, Russian, Spanish, Arabic
+        textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/TimesNewRoman.ttf"), size=FONTSIZE)
+    elif cls_name_lang == "ko":
+        # Korean
+        textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/malgun.ttf"), size=FONTSIZE)
+    # video->frame
+    gc.collect()
+    output_video_path = "./output.avi"
+    cap = cv2.VideoCapture(video)
+    fourcc = cv2.VideoWriter_fourcc(*"I420")  # encoder
+    out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
+    while cap.isOpened():
+        ret, frame = cap.read()
+        # Determine empty frame
+        if not ret:
+            break
+        frame2 = frame.copy()
+        results = model(frame2, size=infer_size)  # detection
+        h, w, _ = frame.shape  # frame size
+        img_size = (w, h)  # frame size
+        for result in results.xyxyn:
+            for i in range(len(result)):
+                id = int(i)  # instance ID
+                obj_cls_index = int(result[i][5])  # category index
+                obj_cls = model_cls_name_cp[obj_cls_index]  # category
+                # ------------ border coordinates ------------
+                x0 = float(result[i][:4].tolist()[0])
+                y0 = float(result[i][:4].tolist()[1])
+                x1 = float(result[i][:4].tolist()[2])
+                y1 = float(result[i][:4].tolist()[3])
+                # ------------ Actual coordinates of the border ------------
+                x0 = int(img_size[0] * x0)
+                y0 = int(img_size[1] * y0)
+                x1 = int(img_size[0] * x1)
+                y1 = int(img_size[1] * y1)
+                conf = float(result[i][4])  # confidence
+                # fps = f"{(1000 / float(results.t[1])):.2f}" # FPS
+                frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+                frame = pil_draw(
+                    frame,
+                    f"{id}-{obj_cls}:{conf:.2f}",
+                    textFont,
+                    [x0, y0, x1, y1],
+                    FONTSIZE,
+                    opt,
+                )
+                frame = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR)
+            # frame->video
+            out.write(frame)
+    out.release()
+    cap.release()
+    # cv2.destroyAllWindows()
+    return output_video_path
 def main(args):
     gr.close_all()
     global model, model_cls_name_cp, cls_name
     source = args.source
+    source_video = args.source_video
     img_tool = args.img_tool
     nms_conf = args.nms_conf
     nms_iou = args.nms_iou
     usr_pwd = args.usr_pwd
     is_share = args.is_share
+    is_fonts(f"{ROOT_PATH}/fonts")  # Check font files
     # model loading
     model = model_loading(model_name, device)
+    model_names = yaml_csv(model_cfg, "model_names")  # model names
+    model_cls_name = yaml_csv(cls_name, "model_cls_name")  # class name
+    model_cls_name_cp = model_cls_name.copy()  # class name
     # ------------------- Input Components -------------------
     inputs_img = gr.Image(image_mode="RGB", source=source, tool=img_tool, type="pil", label="original image")
+    inputs_device01 = gr.Radio(choices=["cuda:0", "cpu"], value=device, label="device")
+    inputs_model01 = gr.Dropdown(choices=model_names, value=model_name, type="value", label="model")
+    inputs_size01 = gr.Radio(choices=[320, 640, 1280], value=inference_size, label="inference size")
+    input_conf01 = gr.Slider(0, 1, step=slider_step, value=nms_conf, label="confidence threshold")
+    inputs_iou01 = gr.Slider(0, 1, step=slider_step, value=nms_iou, label="IoU threshold")
+    inputs_maxnum01 = gr.Number(value=max_detnum, label="Maximum number of detections")
+    inputs_clsName01 = gr.CheckboxGroup(choices=model_cls_name, value=model_cls_name, type="index", label="category")
+    inputs_opt01 = gr.CheckboxGroup(choices=["label", "pdf", "json"],
+                                    value=["label", "pdf"],
+                                    type="value",
+                                    label="operate")
+    # ------------------- Input Components -------------------
+    inputs_video = gr.Video(format="mp4", source=source_video, label="original video")  # webcam
+    inputs_device02 = gr.Radio(choices=["cuda:0", "cpu"], value=device, label="device")
+    inputs_model02 = gr.Dropdown(choices=model_names, value=model_name, type="value", label="model")
+    inputs_size02 = gr.Radio(choices=[320, 640, 1280], value=inference_size, label="inference size")
+    input_conf02 = gr.Slider(0, 1, step=slider_step, value=nms_conf, label="confidence threshold")
+    inputs_iou02 = gr.Slider(0, 1, step=slider_step, value=nms_iou, label="IoU threshold")
+    inputs_maxnum02 = gr.Number(value=max_detnum, label="Maximum number of detections")
+    inputs_clsName02 = gr.CheckboxGroup(choices=model_cls_name, value=model_cls_name, type="index", label="category")
+    inputs_opt02 = gr.CheckboxGroup(choices=["label"], value=["label"], type="value", label="operate")
     # Input parameters
+    inputs_img_list = [
+        inputs_img,  # input image
+        inputs_device01,  # device
+        inputs_model01,  # model
+        inputs_size01,  # inference size
+        input_conf01,  # confidence threshold
+        inputs_iou01,  # IoU threshold
+        inputs_maxnum01,  # maximum number of detections
+        inputs_clsName01,  # category
+        inputs_opt01,  # detect operations
+    ]
+    inputs_video_list = [
+        inputs_video,  # input image
+        inputs_device02,  # device
+        inputs_model02,  # model
+        inputs_size02,  # inference size
+        input_conf02,  # confidence threshold
+        inputs_iou02,  # IoU threshold
+        inputs_maxnum02,  # maximum number of detections
+        inputs_clsName02,  # category
+        inputs_opt02,  # detect operation
     ]
+    # -------------------output component-------------------
     outputs_img = gr.Image(type="pil", label="Detection image")
     outputs_json = gr.JSON(label="Detection information")
     outputs_pdf = gr.File(label="Download test report")
+    outputs_df = gr.Dataframe(max_rows=5,
+                              overflow_row_behaviour="paginate",
+                              type="pandas",
+                              label="List of detection information")
     outputs_objSize = gr.Label(label="Object size ratio statistics")
     outputs_clsSize = gr.Label(label="Category detection proportion statistics")
+    # -------------------output component-------------------
+    outputs_video = gr.Video(format='mp4', label="Detection video")
+    # output parameters
+    outputs_img_list = [outputs_img, outputs_objSize, outputs_clsSize, outputs_json, outputs_pdf, outputs_df]
+    outputs_video_list = [outputs_video]
     # title
+    title = "Gradio YOLOv5 Det v0.4"
     # describe
     description = "<div align='center'>Customizable target detection model, easy to install, easy to use</div>"
             ["label", "pdf"],],]
     # interface
+    gyd_img = gr.Interface(
+        fn=yolo_det_img,
+        inputs=inputs_img_list,
+        outputs=outputs_img_list,
         title=title,
         description=description,
         # article=article,
         # examples=examples,
         # theme="seafoam",
+        # live=True, # Change output in real time
+        flagging_dir="run",  # output directory
+        # allow_flagging="manual",
+        # flagging_options=["good", "generally", "bad"],
     )
+    gyd_video = gr.Interface(
+        # fn=yolo_det_video_test,
+        fn=yolo_det_video,
+        inputs=inputs_video_list,
+        outputs=outputs_video_list,
+        title=title,
+        description=description,
+        # article=article,
+        # examples=examples,
+        # theme="seafoam",
+        # live=True, # Change output in real time
+        flagging_dir="run",  # output directory
+        allow_flagging="never",
+        # flagging_options=["good", "generally", "bad"],
+    )
+    gyd = gr.TabbedInterface(interface_list=[gyd_img, gyd_video], tab_names=["Image Mode", "Video Mode"])
     if not is_login:
         gyd.launch(
             inbrowser=True,  # Automatically open default browser
 if __name__ == "__main__":
     args = parse_args()
+    main(args)

requirements.txt CHANGED Viewed

@@ -1,17 +1,22 @@
 # Base ----------------------------------------
 matplotlib>=3.2.2
-numpy>=1.18.5
 opencv-python-headless>=4.5.5.64
 Pillow>=7.1.2
 PyYAML>=5.3.1
 requests>=2.23.0
-scipy>=1.4.1
 torch>=1.7.0
 torchvision>=0.8.1
 tqdm>=4.41.0
 wget>=3.2
 rich>=12.2.0
 fpdf>=1.7.2
 # Logging -------------------------------------
 tensorboard>=2.4.1
@@ -31,8 +36,9 @@ seaborn>=0.11.0
 # openvino-dev  # OpenVINO export
 # Extras --------------------------------------
 # albumentations>=1.0.3
-# Cython  # for pycocotools https://github.com/cocodataset/cocoapi/issues/172
 # pycocotools>=2.0  # COCO mAP
-# roboflow
-thop  # FLOPs computation

 # Base ----------------------------------------
 matplotlib>=3.2.2
+numpy>=1.22.3
 opencv-python-headless>=4.5.5.64
 Pillow>=7.1.2
 PyYAML>=5.3.1
 requests>=2.23.0
+scipy>=1.4.1  # Google Colab version
 torch>=1.7.0
 torchvision>=0.8.1
 tqdm>=4.41.0
+# Gradio YOLOv5 Det ----------------------------------------
+gradio>=3.0.3
 wget>=3.2
 rich>=12.2.0
 fpdf>=1.7.2
+plotly>=5.7.0
+bokeh>=2.4.2
 # Logging -------------------------------------
 tensorboard>=2.4.1
 # openvino-dev  # OpenVINO export
 # Extras --------------------------------------
+ipython  # interactive notebook
+psutil  # system utilization
+thop  # FLOPs computation
 # albumentations>=1.0.3
 # pycocotools>=2.0  # COCO mAP
+# roboflow