Spaces:
Sleeping
Sleeping
| from keras.preprocessing.sequence import pad_sequences | |
| import numpy as np | |
| import re | |
| # import tensorflow as tf | |
| import os | |
| import requests | |
| from keras.models import load_model | |
| headers = {"Authorization": f"Bearer {os.environ['HF_Token']}"} | |
| model = load_model("./model.keras") | |
| def query_embeddings(texts): | |
| payload = {"inputs": texts, "options": {"wait_for_model": True}} | |
| model_id = "sentence-transformers/sentence-t5-base" | |
| API_URL = ( | |
| f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}" | |
| ) | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| def preprocess(sentences): | |
| max_len = 1682 | |
| embeddings = query_embeddings(sentences) | |
| if len(sentences) > max_len: | |
| X = embeddings[:max_len] | |
| else: | |
| X = embeddings | |
| X_padded = pad_sequences([X], maxlen=max_len, dtype="float32", padding="post") | |
| return X_padded | |
| def predict_from_document(sentences): | |
| preprop = preprocess(sentences) | |
| prediction = model.predict(preprop) | |
| # Set the prediction threshold to 0.8 instead of 0.5, now use mean | |
| if np.mean(prediction) < 0.5: | |
| output = (prediction.flatten()[: len(sentences)] >= 0.5).astype(int) | |
| else: | |
| output = ( | |
| prediction.flatten()[: len(sentences)] | |
| >= np.mean(prediction) * 1.20 # + np.std(prediction) | |
| ).astype(int) | |
| return output, prediction.flatten()[: len(sentences)] | |