omer15699 commited on
Commit
a54bd7b
verified
1 Parent(s): 450b693

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -4
app.py CHANGED
@@ -34,25 +34,23 @@ def _l2norm(x: np.ndarray) -> np.ndarray:
34
  x = x.reshape(1, -1)
35
  return x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-12)
36
 
 
37
  # -------- Load sample data once (FAST: only a slice) --------
38
  @functools.lru_cache(maxsize=1)
39
  def load_sample_df():
40
 
41
  import pandas as pd
42
 
43
- # 1) 谞住讬讜谉 专讗砖讜谉: sentiment140 拽讟谉 诪专讗砖
44
  try:
45
  from datasets import load_dataset
46
  ds = load_dataset("sentiment140", split=f"train[:{SAMPLE_SIZE}]")
47
  df = ds.to_pandas()
48
  except Exception:
49
- # 2) 谞住讬讜谉 砖谞讬: tweet_eval (注讜讚 讬讜转专 拽讟谉/讗诪讬谉)
50
  try:
51
  from datasets import load_dataset
52
  ds = load_dataset("tweet_eval", "sentiment", split=f"train[:{SAMPLE_SIZE}]")
53
  df = ds.to_pandas().rename(columns={"text": "text"})
54
  except Exception:
55
- # 3) 驻讜诇讘讗拽 讗讞专讜谉: 专砖讬诪转 讟拽住讟讬诐 拽讟谞讛 诪拽讜诪讬转 (讻讚讬 砖讛-UI 讬专讜抓 讘讻诇 诪拽专讛)
56
  fallback_texts = [
57
  "I love this product!", "This is terrible...", "Best purchase ever",
58
  "Pretty good overall", "I am not happy with the service",
@@ -61,7 +59,6 @@ def load_sample_df():
61
  ]
62
  return pd.DataFrame({"text": fallback_texts, "clean_text": fallback_texts})
63
 
64
- # 谞讬拽讜讬 拽诇 讜砖讬诪讜专 专拽 讟拽住讟
65
  df = df.dropna(subset=["text"]).copy()
66
  df["text_length"] = df["text"].astype(str).str.len()
67
  df = df[(df["text_length"] >= 5) & (df["text_length"] <= 280)].copy()
 
34
  x = x.reshape(1, -1)
35
  return x / (np.linalg.norm(x, axis=1, keepdims=True) + 1e-12)
36
 
37
+
38
  # -------- Load sample data once (FAST: only a slice) --------
39
  @functools.lru_cache(maxsize=1)
40
  def load_sample_df():
41
 
42
  import pandas as pd
43
 
 
44
  try:
45
  from datasets import load_dataset
46
  ds = load_dataset("sentiment140", split=f"train[:{SAMPLE_SIZE}]")
47
  df = ds.to_pandas()
48
  except Exception:
 
49
  try:
50
  from datasets import load_dataset
51
  ds = load_dataset("tweet_eval", "sentiment", split=f"train[:{SAMPLE_SIZE}]")
52
  df = ds.to_pandas().rename(columns={"text": "text"})
53
  except Exception:
 
54
  fallback_texts = [
55
  "I love this product!", "This is terrible...", "Best purchase ever",
56
  "Pretty good overall", "I am not happy with the service",
 
59
  ]
60
  return pd.DataFrame({"text": fallback_texts, "clean_text": fallback_texts})
61
 
 
62
  df = df.dropna(subset=["text"]).copy()
63
  df["text_length"] = df["text"].astype(str).str.len()
64
  df = df[(df["text_length"] >= 5) & (df["text_length"] <= 280)].copy()