ming commited on
Commit
fc9914e
·
1 Parent(s): 9884884

Update v2 API and HF streaming summarizer improvements

Browse files
app/api/v2/summarize.py CHANGED
@@ -20,6 +20,7 @@ async def summarize_stream(payload: SummarizeRequest):
20
  headers={
21
  "Cache-Control": "no-cache",
22
  "Connection": "keep-alive",
 
23
  }
24
  )
25
 
 
20
  headers={
21
  "Cache-Control": "no-cache",
22
  "Connection": "keep-alive",
23
+ "X-Accel-Buffering": "no",
24
  }
25
  )
26
 
app/services/hf_streaming_summarizer.py CHANGED
@@ -315,7 +315,7 @@ class HFStreamingSummarizer:
315
  **inputs,
316
  "streamer": streamer,
317
  "max_new_tokens": max_new_tokens,
318
- "do_sample": True,
319
  "temperature": temperature,
320
  "top_p": top_p,
321
  "pad_token_id": pad_id,
@@ -547,7 +547,7 @@ class HFStreamingSummarizer:
547
  **inputs,
548
  "streamer": streamer,
549
  "max_new_tokens": max_new_tokens,
550
- "do_sample": True,
551
  "temperature": temperature,
552
  "top_p": top_p,
553
  "pad_token_id": pad_id,
 
315
  **inputs,
316
  "streamer": streamer,
317
  "max_new_tokens": max_new_tokens,
318
+ "do_sample": False,
319
  "temperature": temperature,
320
  "top_p": top_p,
321
  "pad_token_id": pad_id,
 
547
  **inputs,
548
  "streamer": streamer,
549
  "max_new_tokens": max_new_tokens,
550
+ "do_sample": False,
551
  "temperature": temperature,
552
  "top_p": top_p,
553
  "pad_token_id": pad_id,