Spaces:

Detomo
/

aisatsu-api

Sleeping

App Files Files Community

vumichien commited on Mar 25, 2023

Commit

b7f8699

1 Parent(s): 553c308

Create main.py

Browse files

Files changed (1) hide show

main.py +58 -0

main.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from ultralyticsplus import YOLO
+from base64 import b64encode
+from speech_recognition import AudioFile, Recognizer
+import numpy as np
+from scipy.spatial import distance as dist
+from sahi.utils.cv import read_image_as_pil
+from fastapi import FastAPI, File, UploadFile, Form
+from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
+from typing import Optional
+model = YOLO('ultralyticsplus/yolov8s')
+CLASS = model.model.names
+app = FastAPI()
+defaul_bot_voice = "おはいようございます"
+area_thres = 0.3
+@app.get("/")
+def read_root():
+    return {"Message": "Application startup complete"}
+@app.post("/aisatsu_api/")
+async def predict_api(
+        file: UploadFile = File(...),
+        last_seen: Optional[str] = Form(None)
+):
+    image = read_image_file(await file.read())
+    results = model.predict(image, show=False)[0]
+    image = read_image_as_pil(image)
+    masks, boxes = results.masks, results.boxes
+    area_image = image.width * image.height
+    voice_bot = None
+    most_close = 0
+    out_img = None
+    diff_value = 0.5
+    if boxes is not None:
+        for xyxy, conf, cls in zip(boxes.xyxy, boxes.conf, boxes.cls):
+            if int(cls) != 0:
+                continue
+            box = xyxy.tolist()
+            area_rate = (box[2] - box[0]) * (box[3] - box[1]) / area_image
+            if area_rate >= most_close:
+                out_img = image.crop(tuple(box)).resize((128, 128))
+                most_close = area_rate
+    if last_seen is not None:
+        last_seen = base64_to_pil(last_seen)
+        if out_img is not None:
+            diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
+    print(most_close, diff_value)
+    if most_close >= area_thres and diff_value >= 0.5:
+        voice_bot = tts(defaul_bot_voice, language="ja")
+    return {
+        "voice": voice_bot,
+        "image": pil_to_base64(out_img) if out_img is not None else None
+    }