Spaces:
Runtime error
Runtime error
| import pyterrier as pt | |
| pt.init() | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| from pyterrier_doc2query import Doc2Query, QueryScorer, QueryFilter | |
| from pyterrier_dr import ElectraScorer | |
| from pyterrier_gradio import Demo, MarkdownFile, interface, df2code, code2md, EX_D | |
| MODEL = 'macavaney/doc2query-t5-base-msmarco' | |
| SCORE_MODEL = 'crystina-z/monoELECTRA_LCE_nneg31' | |
| PERCENTILES_BY_5 = np.array([-3.80468750e+00, -2.21679688e+00, -1.25683594e+00, -5.58105469e-01, -7.65323639e-04, 4.69482422e-01, 8.83300781e-01, 1.25878906e+00, 1.61035156e+00, 1.94335938e+00, 2.26562500e+00, 2.58007812e+00, 2.89648438e+00, 3.21484375e+00, 3.54687500e+00, 3.90039062e+00, 4.30078125e+00, 4.77343750e+00, 5.37109375e+00]) | |
| COLORS = ['rgb(252, 132, 100)','rgb(252, 148, 116)','rgb(252, 166, 137)','rgb(252, 183, 156)','rgb(253, 200, 178)','rgb(254, 215, 198)','rgb(255, 228, 216)','rgb(255, 237, 228)','rgb(256, 245, 240)','rgb(256, 256, 256)','rgb(247, 252, 245)','rgb(240, 250, 237)','rgb(233, 247, 228)','rgb(222, 242, 216)','rgb(209, 237, 203)','rgb(195, 232, 188)','rgb(180, 225, 173)','rgb(163, 218, 157)','rgb(145, 210, 142)','rgb(125, 201, 126)'] | |
| doc2query = Doc2Query(MODEL, append=True, num_samples=5) | |
| electra = ElectraScorer() | |
| query_scorer = QueryScorer(electra) | |
| COLAB_NAME = 'pyterrier_doc2query.ipynb' | |
| COLAB_INSTALL = ''' | |
| !pip install -q git+https://github.com/terrier-org/pyterrier | |
| !pip install -q git+https://github.com/terrierteam/pyterrier_doc2query | |
| '''.strip() | |
| COLAB_INSTALL_MM = COLAB_INSTALL + '\n!pip install -q git+https://github.com/terrierteam/pyterrier_dr faiss-cpu' | |
| def predict(input, model, append, num_samples): | |
| assert model == MODEL | |
| doc2query.append = append | |
| doc2query.num_samples = num_samples | |
| code = f'''import pandas as pd | |
| from pyterrier_doc2query import Doc2Query | |
| doc2query = Doc2Query({repr(model)}, append={append}, num_samples={num_samples}) | |
| doc2query({df2code(input)}) | |
| ''' | |
| res = doc2query(input) | |
| vis = generate_vis(res) | |
| return (doc2query(input), code2md(code, COLAB_INSTALL, COLAB_NAME), vis) | |
| def generate_vis(df): | |
| result = [] | |
| for row in df.itertuples(index=False): | |
| qs = [] | |
| if hasattr(row, 'querygen_score'): | |
| for q, score in zip(row.querygen.split('\n'), row.querygen_score): | |
| bucket = np.searchsorted(PERCENTILES_BY_5, score) | |
| color = COLORS[bucket] | |
| percentile = bucket * 5 | |
| qs.append(f''' | |
| <div> | |
| <span title="score={score:.4f}, in the {percentile}th percentile of scores" style="border: 1px solid #888; border-radius: 3px; font-size: 0.6em; font-family: monospace; background-color: {color}; padding: 1px 3px;">{percentile}th</span> {q} | |
| </div> | |
| ''') | |
| elif hasattr(row, 'querygen'): | |
| for q in row.querygen.split('\n'): | |
| qs.append(f''' | |
| <div>{q}</div> | |
| ''') | |
| qs = '\n'.join(qs) | |
| if qs: | |
| qs = f''' | |
| <div><strong>Expansion Queries:</strong></div> | |
| {qs} | |
| ''' | |
| text = row.text.replace('\n', '<br/>') | |
| result.append(f''' | |
| <div style="font-size: 1.2em;">Document: <strong>{row.docno}</strong></div> | |
| <div style="margin: 4px 0 16px; padding: 4px; border: 1px solid black;"> | |
| <div> | |
| {text} | |
| </div> | |
| {qs} | |
| </div> | |
| ''') | |
| return '\n'.join(result) | |
| def predict_mm(input, model, num_samples, score_model): | |
| assert model == MODEL | |
| assert score_model == SCORE_MODEL | |
| doc2query.append = False | |
| doc2query.num_samples = num_samples | |
| pipeline = doc2query >> query_scorer | |
| code = f'''import pyterrier as pt ; pt.init() | |
| import pandas as pd | |
| from pyterrier_doc2query import Doc2Query, QueryScorer | |
| from pyterrier_dr import ElectraScorer | |
| doc2query = Doc2Query({repr(model)}, append=False, num_samples={num_samples}) | |
| scorer = ElectraScorer({repr(score_model)}) | |
| pipeline = doc2query >> QueryScorer(scorer) | |
| pipeline({df2code(input)}) | |
| ''' | |
| res = pipeline(input) | |
| vis = generate_vis(res) | |
| res['querygen_score'] = res['querygen_score'].apply(lambda x: '[ ' + ', '.join(str(v) for v in x) + ' ]') | |
| return (res, code2md(code, COLAB_INSTALL_MM, COLAB_NAME), vis) | |
| interface( | |
| MarkdownFile('README.md'), | |
| Demo( | |
| predict, | |
| EX_D, | |
| [ | |
| gr.Dropdown( | |
| choices=[MODEL], | |
| value=MODEL, | |
| label='Model', | |
| interactive=False, | |
| ), gr.Checkbox( | |
| value=doc2query.append, | |
| label="Append", | |
| ), gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=doc2query.num_samples, | |
| step=1., | |
| label='# Queries' | |
| )], | |
| ), | |
| MarkdownFile('mm.md'), | |
| Demo( | |
| predict_mm, | |
| EX_D, | |
| [ | |
| gr.Dropdown( | |
| choices=[MODEL], | |
| value=MODEL, | |
| label='Model', | |
| interactive=False, | |
| ), gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=doc2query.num_samples, | |
| step=1., | |
| label='# Queries' | |
| ), gr.Dropdown( | |
| choices=[SCORE_MODEL], | |
| value=SCORE_MODEL, | |
| label='Filter', | |
| interactive=False, | |
| )], | |
| ), | |
| MarkdownFile('wrapup.md'), | |
| ).launch(share=False) | |