Jawad commited on
Commit
c359068
·
1 Parent(s): d036d71

manage nan in decision id

Browse files
Files changed (2) hide show
  1. score_app.py +4 -2
  2. stream_app.py +10 -1
score_app.py CHANGED
@@ -2,6 +2,7 @@
2
  import streamlit as st
3
  import requests
4
  import pandas as pd
 
5
  import datetime
6
  from data import headers
7
 
@@ -18,7 +19,7 @@ created_at = st.sidebar.date_input('Date input', value=datetime.date(2021, 1, 1)
18
  @st.cache
19
  def load_data(source_type, start_date):
20
  def get_decision_hist(d_id):
21
- url = f"https://www.theolex.io/data/decisions/{d_id}/return_hist/"
22
  res = requests.get(url, headers=headers)
23
  return res.json()
24
 
@@ -34,7 +35,8 @@ def load_data(source_type, start_date):
34
  data_sources = data_sources[data_sources.created_at >= start_date]
35
 
36
  # get decisions history
37
- data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id']]
 
38
  return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T)
39
  for _id, data in data_list if len(data) > 0]
40
 
 
2
  import streamlit as st
3
  import requests
4
  import pandas as pd
5
+ import numpy as np
6
  import datetime
7
  from data import headers
8
 
 
19
  @st.cache
20
  def load_data(source_type, start_date):
21
  def get_decision_hist(d_id):
22
+ url = f"https://www.theolex.io/data/decisions/{int(d_id)}/return_hist/"
23
  res = requests.get(url, headers=headers)
24
  return res.json()
25
 
 
35
  data_sources = data_sources[data_sources.created_at >= start_date]
36
 
37
  # get decisions history
38
+ # can be optimized by filtering first on validated decision for decision table
39
+ data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id'] if not np.isnan(_id)]
40
  return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T)
41
  for _id, data in data_list if len(data) > 0]
42
 
stream_app.py CHANGED
@@ -1,6 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  import pandas as pd
3
  import streamlit as st
 
4
 
5
  import plotly.express as px
6
  import plotly.figure_factory as ff
@@ -205,7 +206,7 @@ if st.button('Run training'):
205
  test_bias = np.mean(test_errors)
206
  st.metric(label="Test bias", value=test_bias)
207
 
208
- fig = ff.create_distplot([test_errors], ['errors distribution'], bin_size=0.1)
209
  fig.update_layout(width=1000,
210
  template="simple_white",
211
  height=600,
@@ -260,6 +261,14 @@ if st.button('Run training'):
260
  R_sq = corr_matrix[0, 1] ** 2
261
  st.metric(label="Explained variation thanks to model (R^2)", value=f"{round(100 * R_sq, 2)}%")
262
 
 
 
 
 
 
 
 
 
263
  st.sidebar.title("Organizations view")
264
  col_x = ['log10_org_revenues', 'authorities_country', 'violation_theme', 'org_country', 'org_company_type']
265
  sample_revenues = st.sidebar.number_input('Yearly revenues', value=1000000)
 
1
  # -*- coding: utf-8 -*-
2
  import pandas as pd
3
  import streamlit as st
4
+ from scipy import stats
5
 
6
  import plotly.express as px
7
  import plotly.figure_factory as ff
 
206
  test_bias = np.mean(test_errors)
207
  st.metric(label="Test bias", value=test_bias)
208
 
209
+ fig = ff.create_distplot([test_errors], ['errors distribution'], bin_size=0.2)
210
  fig.update_layout(width=1000,
211
  template="simple_white",
212
  height=600,
 
261
  R_sq = corr_matrix[0, 1] ** 2
262
  st.metric(label="Explained variation thanks to model (R^2)", value=f"{round(100 * R_sq, 2)}%")
263
 
264
+
265
+ st.subheader("Plot predicted vs real")
266
+ #st.metric(label="Explained variation thanks to model (R^2)", value=f"{round(100 * R_sq, 2)}%")
267
+
268
+ print(stats.pearsonr(test_errors, target_test_predicted))
269
+
270
+
271
+
272
  st.sidebar.title("Organizations view")
273
  col_x = ['log10_org_revenues', 'authorities_country', 'violation_theme', 'org_country', 'org_company_type']
274
  sample_revenues = st.sidebar.number_input('Yearly revenues', value=1000000)