Update app.py
Browse files
app.py
CHANGED
|
@@ -34,25 +34,23 @@ def _l2norm(x: np.ndarray) -> np.ndarray:
|
|
| 34 |
x = x.reshape(1, -1)
|
| 35 |
return x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-12)
|
| 36 |
|
|
|
|
| 37 |
# -------- Load sample data once (FAST: only a slice) --------
|
| 38 |
@functools.lru_cache(maxsize=1)
|
| 39 |
def load_sample_df():
|
| 40 |
|
| 41 |
import pandas as pd
|
| 42 |
|
| 43 |
-
# 1) 谞住讬讜谉 专讗砖讜谉: sentiment140 拽讟谉 诪专讗砖
|
| 44 |
try:
|
| 45 |
from datasets import load_dataset
|
| 46 |
ds = load_dataset("sentiment140", split=f"train[:{SAMPLE_SIZE}]")
|
| 47 |
df = ds.to_pandas()
|
| 48 |
except Exception:
|
| 49 |
-
# 2) 谞住讬讜谉 砖谞讬: tweet_eval (注讜讚 讬讜转专 拽讟谉/讗诪讬谉)
|
| 50 |
try:
|
| 51 |
from datasets import load_dataset
|
| 52 |
ds = load_dataset("tweet_eval", "sentiment", split=f"train[:{SAMPLE_SIZE}]")
|
| 53 |
df = ds.to_pandas().rename(columns={"text": "text"})
|
| 54 |
except Exception:
|
| 55 |
-
# 3) 驻讜诇讘讗拽 讗讞专讜谉: 专砖讬诪转 讟拽住讟讬诐 拽讟谞讛 诪拽讜诪讬转 (讻讚讬 砖讛-UI 讬专讜抓 讘讻诇 诪拽专讛)
|
| 56 |
fallback_texts = [
|
| 57 |
"I love this product!", "This is terrible...", "Best purchase ever",
|
| 58 |
"Pretty good overall", "I am not happy with the service",
|
|
@@ -61,7 +59,6 @@ def load_sample_df():
|
|
| 61 |
]
|
| 62 |
return pd.DataFrame({"text": fallback_texts, "clean_text": fallback_texts})
|
| 63 |
|
| 64 |
-
# 谞讬拽讜讬 拽诇 讜砖讬诪讜专 专拽 讟拽住讟
|
| 65 |
df = df.dropna(subset=["text"]).copy()
|
| 66 |
df["text_length"] = df["text"].astype(str).str.len()
|
| 67 |
df = df[(df["text_length"] >= 5) & (df["text_length"] <= 280)].copy()
|
|
|
|
| 34 |
x = x.reshape(1, -1)
|
| 35 |
return x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-12)
|
| 36 |
|
| 37 |
+
|
| 38 |
# -------- Load sample data once (FAST: only a slice) --------
|
| 39 |
@functools.lru_cache(maxsize=1)
|
| 40 |
def load_sample_df():
|
| 41 |
|
| 42 |
import pandas as pd
|
| 43 |
|
|
|
|
| 44 |
try:
|
| 45 |
from datasets import load_dataset
|
| 46 |
ds = load_dataset("sentiment140", split=f"train[:{SAMPLE_SIZE}]")
|
| 47 |
df = ds.to_pandas()
|
| 48 |
except Exception:
|
|
|
|
| 49 |
try:
|
| 50 |
from datasets import load_dataset
|
| 51 |
ds = load_dataset("tweet_eval", "sentiment", split=f"train[:{SAMPLE_SIZE}]")
|
| 52 |
df = ds.to_pandas().rename(columns={"text": "text"})
|
| 53 |
except Exception:
|
|
|
|
| 54 |
fallback_texts = [
|
| 55 |
"I love this product!", "This is terrible...", "Best purchase ever",
|
| 56 |
"Pretty good overall", "I am not happy with the service",
|
|
|
|
| 59 |
]
|
| 60 |
return pd.DataFrame({"text": fallback_texts, "clean_text": fallback_texts})
|
| 61 |
|
|
|
|
| 62 |
df = df.dropna(subset=["text"]).copy()
|
| 63 |
df["text_length"] = df["text"].astype(str).str.len()
|
| 64 |
df = df[(df["text_length"] >= 5) & (df["text_length"] <= 280)].copy()
|