Spaces:

Theolex
/

theolex_streamlit

Runtime error

Jawad commited on Nov 24, 2021

Commit

c359068

1 Parent(s): d036d71

manage nan in decision id

Files changed (2) hide show

score_app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import streamlit as st
 import requests
 import pandas as pd
 import datetime
 from data import headers
@@ -18,7 +19,7 @@ created_at = st.sidebar.date_input('Date input', value=datetime.date(2021, 1, 1)
 @st.cache
 def load_data(source_type, start_date):
     def get_decision_hist(d_id):
-        url = f"https://www.theolex.io/data/decisions/{d_id}/return_hist/"
         res = requests.get(url, headers=headers)
         return res.json()
@@ -34,7 +35,8 @@ def load_data(source_type, start_date):
     data_sources = data_sources[data_sources.created_at >= start_date]
     # get decisions history
-    data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id']]
     return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T)
             for _id, data in data_list if len(data) > 0]

 import streamlit as st
 import requests
 import pandas as pd
+import numpy as np
 import datetime
 from data import headers
 @st.cache
 def load_data(source_type, start_date):
     def get_decision_hist(d_id):
+        url = f"https://www.theolex.io/data/decisions/{int(d_id)}/return_hist/"
         res = requests.get(url, headers=headers)
         return res.json()
     data_sources = data_sources[data_sources.created_at >= start_date]
     # get decisions history
+    # can be optimized by filtering first on validated decision for decision table
+    data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id'] if not np.isnan(_id)]
     return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T)
             for _id, data in data_list if len(data) > 0]

stream_app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import pandas as pd
 import streamlit as st
 import plotly.express as px
 import plotly.figure_factory as ff
@@ -205,7 +206,7 @@ if st.button('Run training'):
         test_bias = np.mean(test_errors)
         st.metric(label="Test bias", value=test_bias)
-        fig = ff.create_distplot([test_errors], ['errors distribution'], bin_size=0.1)
         fig.update_layout(width=1000,
                           template="simple_white",
                           height=600,
@@ -260,6 +261,14 @@ if st.button('Run training'):
         R_sq = corr_matrix[0, 1] ** 2
         st.metric(label="Explained variation thanks to model (R^2)", value=f"{round(100 * R_sq, 2)}%")
 st.sidebar.title("Organizations view")
 col_x = ['log10_org_revenues', 'authorities_country', 'violation_theme', 'org_country', 'org_company_type']
 sample_revenues = st.sidebar.number_input('Yearly revenues', value=1000000)

 # -*- coding: utf-8 -*-
 import pandas as pd
 import streamlit as st
+from scipy import stats
 import plotly.express as px
 import plotly.figure_factory as ff
         test_bias = np.mean(test_errors)
         st.metric(label="Test bias", value=test_bias)
+        fig = ff.create_distplot([test_errors], ['errors distribution'], bin_size=0.2)
         fig.update_layout(width=1000,
                           template="simple_white",
                           height=600,
         R_sq = corr_matrix[0, 1] ** 2
         st.metric(label="Explained variation thanks to model (R^2)", value=f"{round(100 * R_sq, 2)}%")
+        st.subheader("Plot predicted vs real")
+        #st.metric(label="Explained variation thanks to model (R^2)", value=f"{round(100 * R_sq, 2)}%")
+        print(stats.pearsonr(test_errors, target_test_predicted))
 st.sidebar.title("Organizations view")
 col_x = ['log10_org_revenues', 'authorities_country', 'violation_theme', 'org_country', 'org_company_type']
 sample_revenues = st.sidebar.number_input('Yearly revenues', value=1000000)