feat: update to ecologits 0.5.1 + remove own usage to fix it
Browse files- app.py +103 -113
- requirements-dev.txt +1 -1
- requirements.txt +1 -1
- src/scrapper.py +33 -0
- src/utils.py +20 -7
app.py
CHANGED
|
@@ -1,13 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
-
import
|
| 4 |
-
from bs4 import BeautifulSoup
|
| 5 |
-
|
| 6 |
-
import tiktoken
|
| 7 |
-
|
| 8 |
-
from ecologits.tracers.utils import compute_llm_impacts, _avg
|
| 9 |
from ecologits.impacts.llm import compute_llm_impacts as compute_llm_impacts_expert
|
| 10 |
-
from ecologits.impacts.llm import IF_ELECTRICITY_MIX_GWP, IF_ELECTRICITY_MIX_ADPE, IF_ELECTRICITY_MIX_PE
|
| 11 |
from ecologits.model_repository import models
|
| 12 |
|
| 13 |
from src.assets import custom_css
|
|
@@ -36,15 +30,17 @@ from src.utils import (
|
|
| 36 |
format_energy_eq_physical_activity,
|
| 37 |
PhysicalActivity,
|
| 38 |
format_energy_eq_electric_vehicle,
|
| 39 |
-
format_gwp_eq_streaming,
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
df_elec_mix_for_plot
|
| 42 |
)
|
|
|
|
| 43 |
|
| 44 |
CUSTOM = "Custom"
|
| 45 |
|
| 46 |
-
tokenizer = tiktoken.get_encoding('cl100k_base')
|
| 47 |
-
|
| 48 |
def model_list(provider: str) -> gr.Dropdown:
|
| 49 |
if provider == "openai":
|
| 50 |
return gr.Dropdown(
|
|
@@ -86,28 +82,37 @@ def model_list(provider: str) -> gr.Dropdown:
|
|
| 86 |
def custom():
|
| 87 |
return CUSTOM
|
| 88 |
|
| 89 |
-
def tiktoken_len(text):
|
| 90 |
-
tokens = tokenizer.encode(
|
| 91 |
-
text,
|
| 92 |
-
disallowed_special=()
|
| 93 |
-
)
|
| 94 |
-
return len(tokens)
|
| 95 |
-
|
| 96 |
def model_active_params_fn(model_name: str, n_param: float):
|
| 97 |
if model_name == CUSTOM:
|
| 98 |
return n_param
|
| 99 |
provider, model_name = model_name.split('/', 1)
|
| 100 |
model = models.find_model(provider=provider, model_name=model_name)
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def model_total_params_fn(model_name: str, n_param: float):
|
| 105 |
if model_name == CUSTOM:
|
| 106 |
return n_param
|
| 107 |
provider, model_name = model_name.split('/', 1)
|
| 108 |
model = models.find_model(provider=provider, model_name=model_name)
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def mix_fn(country_code: str, mix_adpe: float, mix_pe: float, mix_gwp: float):
|
| 113 |
if country_code == CUSTOM:
|
|
@@ -148,7 +153,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 148 |
if provider.startswith("huggingface_hub"):
|
| 149 |
provider = provider.split("/")[0]
|
| 150 |
if models.find_model(provider, model) is not None:
|
| 151 |
-
impacts =
|
| 152 |
provider=provider,
|
| 153 |
model_name=model,
|
| 154 |
output_token_count=prompt,
|
|
@@ -316,17 +321,17 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 316 |
)
|
| 317 |
input_mix_gwp = gr.Number(
|
| 318 |
label="Electricity mix - GHG emissions [kgCO2eq / kWh]",
|
| 319 |
-
value=
|
| 320 |
interactive=True
|
| 321 |
)
|
| 322 |
input_mix_adpe = gr.Number(
|
| 323 |
label="Electricity mix - Abiotic resources [kgSbeq / kWh]",
|
| 324 |
-
value=
|
| 325 |
interactive=True
|
| 326 |
)
|
| 327 |
input_mix_pe = gr.Number(
|
| 328 |
label="Electricity mix - Primary energy [MJ / kWh]",
|
| 329 |
-
value=
|
| 330 |
interactive=True
|
| 331 |
)
|
| 332 |
|
|
@@ -423,104 +428,89 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 423 |
x_title=None,
|
| 424 |
y_title='electricity mix in gCO2eq / kWh')
|
| 425 |
|
| 426 |
-
with gr.Tab("🔍 Evaluate your own usage"):
|
| 427 |
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
-
|
| 440 |
-
list_text = str(soup).split('parts":["')
|
| 441 |
-
s = ''
|
| 442 |
-
for item in list_text[1:int(len(list_text)/2)]:
|
| 443 |
-
if list_text.index(item)%2 == 1:
|
| 444 |
-
s = s + item.split('"]')[0]
|
| 445 |
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
provider = model.split('/')[0].lower()
|
| 452 |
-
model = model.split('/')[1]
|
| 453 |
-
impacts = compute_llm_impacts(
|
| 454 |
-
provider=provider,
|
| 455 |
-
model_name=model,
|
| 456 |
-
output_token_count=amount_token,
|
| 457 |
-
request_latency=100000
|
| 458 |
-
)
|
| 459 |
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
<p align="center"><i>Evaluates the electricity consumption<i></p><br>
|
| 466 |
-
"""
|
| 467 |
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
pe = f"""
|
| 481 |
-
<h2 align="center">⛽️ Primary Energy</h2>
|
| 482 |
-
$$ \Large {impacts.pe.magnitude:.3g} \ \large {impacts.pe.units} $$
|
| 483 |
-
<p align="center"><i>Evaluates the use of energy resources<i></p><br>
|
| 484 |
-
"""
|
| 485 |
|
| 486 |
-
|
| 487 |
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
|
| 525 |
with gr.Tab("📖 Methodology"):
|
| 526 |
gr.Markdown(METHODOLOGY_TEXT,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
+
from ecologits.tracers.utils import llm_impacts, _avg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from ecologits.impacts.llm import compute_llm_impacts as compute_llm_impacts_expert
|
|
|
|
| 5 |
from ecologits.model_repository import models
|
| 6 |
|
| 7 |
from src.assets import custom_css
|
|
|
|
| 30 |
format_energy_eq_physical_activity,
|
| 31 |
PhysicalActivity,
|
| 32 |
format_energy_eq_electric_vehicle,
|
| 33 |
+
format_gwp_eq_streaming,
|
| 34 |
+
format_energy_eq_electricity_production,
|
| 35 |
+
EnergyProduction,
|
| 36 |
+
format_gwp_eq_airplane_paris_nyc,
|
| 37 |
+
format_energy_eq_electricity_consumption_ireland,
|
| 38 |
df_elec_mix_for_plot
|
| 39 |
)
|
| 40 |
+
from src.scrapper import process_input
|
| 41 |
|
| 42 |
CUSTOM = "Custom"
|
| 43 |
|
|
|
|
|
|
|
| 44 |
def model_list(provider: str) -> gr.Dropdown:
|
| 45 |
if provider == "openai":
|
| 46 |
return gr.Dropdown(
|
|
|
|
| 82 |
def custom():
|
| 83 |
return CUSTOM
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
def model_active_params_fn(model_name: str, n_param: float):
|
| 86 |
if model_name == CUSTOM:
|
| 87 |
return n_param
|
| 88 |
provider, model_name = model_name.split('/', 1)
|
| 89 |
model = models.find_model(provider=provider, model_name=model_name)
|
| 90 |
+
try: #moe with range
|
| 91 |
+
return model.architecture.parameters.active.max
|
| 92 |
+
except:
|
| 93 |
+
try: #moe without range
|
| 94 |
+
return model.architecture.parameters.active
|
| 95 |
+
except:
|
| 96 |
+
try: #dense with range
|
| 97 |
+
return model.architecture.parameters.max
|
| 98 |
+
except: #dense without range
|
| 99 |
+
return model.architecture.parameters
|
| 100 |
|
| 101 |
def model_total_params_fn(model_name: str, n_param: float):
|
| 102 |
if model_name == CUSTOM:
|
| 103 |
return n_param
|
| 104 |
provider, model_name = model_name.split('/', 1)
|
| 105 |
model = models.find_model(provider=provider, model_name=model_name)
|
| 106 |
+
try: #moe
|
| 107 |
+
return model.architecture.parameters.total.max
|
| 108 |
+
except:
|
| 109 |
+
try: #dense with range
|
| 110 |
+
return model.architecture.parameters.max
|
| 111 |
+
except: #dense without range
|
| 112 |
+
try:
|
| 113 |
+
return model.architecture.parameters.total
|
| 114 |
+
except:
|
| 115 |
+
return model.architecture.parameters
|
| 116 |
|
| 117 |
def mix_fn(country_code: str, mix_adpe: float, mix_pe: float, mix_gwp: float):
|
| 118 |
if country_code == CUSTOM:
|
|
|
|
| 153 |
if provider.startswith("huggingface_hub"):
|
| 154 |
provider = provider.split("/")[0]
|
| 155 |
if models.find_model(provider, model) is not None:
|
| 156 |
+
impacts = llm_impacts(
|
| 157 |
provider=provider,
|
| 158 |
model_name=model,
|
| 159 |
output_token_count=prompt,
|
|
|
|
| 321 |
)
|
| 322 |
input_mix_gwp = gr.Number(
|
| 323 |
label="Electricity mix - GHG emissions [kgCO2eq / kWh]",
|
| 324 |
+
value=find_electricity_mix('WOR')[2],
|
| 325 |
interactive=True
|
| 326 |
)
|
| 327 |
input_mix_adpe = gr.Number(
|
| 328 |
label="Electricity mix - Abiotic resources [kgSbeq / kWh]",
|
| 329 |
+
value=find_electricity_mix('WOR')[0],
|
| 330 |
interactive=True
|
| 331 |
)
|
| 332 |
input_mix_pe = gr.Number(
|
| 333 |
label="Electricity mix - Primary energy [MJ / kWh]",
|
| 334 |
+
value=find_electricity_mix('WOR')[1],
|
| 335 |
interactive=True
|
| 336 |
)
|
| 337 |
|
|
|
|
| 428 |
x_title=None,
|
| 429 |
y_title='electricity mix in gCO2eq / kWh')
|
| 430 |
|
| 431 |
+
# with gr.Tab("🔍 Evaluate your own usage"):
|
| 432 |
|
| 433 |
+
# with gr.Row():
|
| 434 |
+
# gr.Markdown("""
|
| 435 |
+
# # 🔍 Evaluate your own usage
|
| 436 |
+
# ⚠️ For now, only ChatGPT conversation import is available.
|
| 437 |
+
# You can always try out other models - however results might be inaccurate due to fixed parameters, such as tokenization method.
|
| 438 |
+
# """)
|
| 439 |
|
| 440 |
+
# def compute_own_impacts(amount_token, model):
|
| 441 |
+
# provider = model.split('/')[0].lower()
|
| 442 |
+
# model = model.split('/')[1]
|
| 443 |
+
# impacts = llm_impacts(
|
| 444 |
+
# provider=provider,
|
| 445 |
+
# model_name=model,
|
| 446 |
+
# output_token_count=amount_token,
|
| 447 |
+
# request_latency=100000
|
| 448 |
+
# )
|
| 449 |
|
| 450 |
+
# impacts = format_impacts(impacts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
|
| 452 |
+
# energy = f"""
|
| 453 |
+
# <h2 align="center">⚡️ Energy</h2>
|
| 454 |
+
# $$ \Large {impacts.energy.magnitude:.3g} \ \large {impacts.energy.units} $$
|
| 455 |
+
# <p align="center"><i>Evaluates the electricity consumption<i></p><br>
|
| 456 |
+
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
|
| 458 |
+
# gwp = f"""
|
| 459 |
+
# <h2 align="center">🌍️ GHG Emissions</h2>
|
| 460 |
+
# $$ \Large {impacts.gwp.magnitude:.3g} \ \large {impacts.gwp.units} $$
|
| 461 |
+
# <p align="center"><i>Evaluates the effect on global warming<i></p><br>
|
| 462 |
+
# """
|
|
|
|
|
|
|
| 463 |
|
| 464 |
+
# adp = f"""
|
| 465 |
+
# <h2 align="center">🪨 Abiotic Resources</h2>
|
| 466 |
+
# $$ \Large {impacts.adpe.magnitude:.3g} \ \large {impacts.adpe.units} $$
|
| 467 |
+
# <p align="center"><i>Evaluates the use of metals and minerals<i></p><br>
|
| 468 |
+
# """
|
| 469 |
|
| 470 |
+
# pe = f"""
|
| 471 |
+
# <h2 align="center">⛽️ Primary Energy</h2>
|
| 472 |
+
# $$ \Large {impacts.pe.magnitude:.3g} \ \large {impacts.pe.units} $$
|
| 473 |
+
# <p align="center"><i>Evaluates the use of energy resources<i></p><br>
|
| 474 |
+
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
|
| 476 |
+
# return energy, gwp, adp, pe
|
| 477 |
|
| 478 |
+
# def combined_function(text, model):
|
| 479 |
+
# n_token = process_input(text)
|
| 480 |
+
# energy, gwp, adp, pe = compute_own_impacts(n_token, model)
|
| 481 |
+
# return n_token, energy, gwp, adp, pe
|
| 482 |
|
| 483 |
+
# with gr.Blocks():
|
| 484 |
+
|
| 485 |
+
# text_input = gr.Textbox(label="Paste the URL here (must be on https://chatgpt.com/share/xxxx format)")
|
| 486 |
+
# model = gr.Dropdown(
|
| 487 |
+
# MODELS,
|
| 488 |
+
# label="Model name",
|
| 489 |
+
# value="openai/gpt-4o",
|
| 490 |
+
# filterable=True,
|
| 491 |
+
# interactive=True
|
| 492 |
+
# )
|
| 493 |
+
|
| 494 |
+
# process_button = gr.Button("Estimate this usage footprint")
|
| 495 |
|
| 496 |
+
# with gr.Accordion("ℹ️ Infos", open=False):
|
| 497 |
+
# n_token = gr.Textbox(label="Total amount of tokens :")
|
| 498 |
+
|
| 499 |
+
# with gr.Row():
|
| 500 |
+
# with gr.Column(scale=1, min_width=220):
|
| 501 |
+
# energy = gr.Markdown()
|
| 502 |
+
# with gr.Column(scale=1, min_width=220):
|
| 503 |
+
# gwp = gr.Markdown()
|
| 504 |
+
# with gr.Column(scale=1, min_width=220):
|
| 505 |
+
# adp = gr.Markdown()
|
| 506 |
+
# with gr.Column(scale=1, min_width=220):
|
| 507 |
+
# pe = gr.Markdown()
|
| 508 |
+
|
| 509 |
+
# process_button.click(
|
| 510 |
+
# fn=combined_function,
|
| 511 |
+
# inputs=[text_input, model],
|
| 512 |
+
# outputs=[n_token, energy, gwp, adp, pe]
|
| 513 |
+
# )
|
| 514 |
|
| 515 |
with gr.Tab("📖 Methodology"):
|
| 516 |
gr.Markdown(METHODOLOGY_TEXT,
|
requirements-dev.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
gradio
|
| 2 |
-
ecologits==0.1
|
| 3 |
pint
|
| 4 |
beautifulsoup4
|
| 5 |
requests
|
|
|
|
| 1 |
gradio
|
| 2 |
+
ecologits==0.5.1
|
| 3 |
pint
|
| 4 |
beautifulsoup4
|
| 5 |
requests
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
ecologits==0.1
|
| 2 |
pint
|
| 3 |
beautifulsoup4
|
| 4 |
requests
|
|
|
|
| 1 |
+
ecologits==0.5.1
|
| 2 |
pint
|
| 3 |
beautifulsoup4
|
| 4 |
requests
|
src/scrapper.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import tiktoken
|
| 4 |
+
|
| 5 |
+
tokenizer = tiktoken.get_encoding('cl100k_base')
|
| 6 |
+
|
| 7 |
+
def process_input(text):
|
| 8 |
+
|
| 9 |
+
r = requests.get(text, verify=False)
|
| 10 |
+
|
| 11 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 12 |
+
print(soup)
|
| 13 |
+
list_text = str(soup).split('parts":["')
|
| 14 |
+
#print(list_text)
|
| 15 |
+
s = ''
|
| 16 |
+
for item in list_text[1:int(len(list_text)/2)]:
|
| 17 |
+
if list_text.index(item)%2 == 1:
|
| 18 |
+
s = s + item.split('"]')[0]
|
| 19 |
+
|
| 20 |
+
amout_token = tiktoken_len(s)
|
| 21 |
+
|
| 22 |
+
return amout_token
|
| 23 |
+
|
| 24 |
+
def tiktoken_len(text):
|
| 25 |
+
tokens = tokenizer.encode(
|
| 26 |
+
text,
|
| 27 |
+
disallowed_special=()
|
| 28 |
+
)
|
| 29 |
+
return len(tokens)
|
| 30 |
+
|
| 31 |
+
answer = process_input('https://chatgpt.com/share/6737b9b5-56fc-8002-a212-35339f5b1d5a')
|
| 32 |
+
|
| 33 |
+
print(answer)
|
src/utils.py
CHANGED
|
@@ -3,7 +3,8 @@ from enum import Enum
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
-
from ecologits.impacts.
|
|
|
|
| 7 |
from pint import UnitRegistry, Quantity
|
| 8 |
|
| 9 |
|
|
@@ -116,12 +117,24 @@ def format_pe(pe: PE) -> Quantity:
|
|
| 116 |
return val
|
| 117 |
|
| 118 |
def format_impacts(impacts: Impacts) -> QImpacts:
|
| 119 |
-
|
| 120 |
-
energy=
|
| 121 |
-
gwp=
|
| 122 |
-
adpe=
|
| 123 |
-
pe=
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
def format_impacts_expert(impacts: Impacts) -> QImpacts:
|
| 127 |
return QImpacts(
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
+
from ecologits.impacts.modeling import Impacts, Energy, GWP, ADPe, PE
|
| 7 |
+
from ecologits.tracers.utils import llm_impacts, _avg
|
| 8 |
from pint import UnitRegistry, Quantity
|
| 9 |
|
| 10 |
|
|
|
|
| 117 |
return val
|
| 118 |
|
| 119 |
def format_impacts(impacts: Impacts) -> QImpacts:
|
| 120 |
+
try:
|
| 121 |
+
impacts.energy.value = (impacts.energy.value.max + impacts.energy.value.min)/2
|
| 122 |
+
impacts.gwp.value = (impacts.gwp.value.max + impacts.gwp.value.min)/2
|
| 123 |
+
impacts.adpe.value = (impacts.adpe.value.max + impacts.adpe.value.min)/2
|
| 124 |
+
impacts.pe.value = (impacts.pe.value.max + impacts.pe.value.min)/2
|
| 125 |
+
return QImpacts(
|
| 126 |
+
energy=format_energy(impacts.energy),
|
| 127 |
+
gwp=format_gwp(impacts.gwp),
|
| 128 |
+
adpe=format_adpe(impacts.adpe),
|
| 129 |
+
pe=format_pe(impacts.pe)
|
| 130 |
+
)
|
| 131 |
+
except: #when no range
|
| 132 |
+
return QImpacts(
|
| 133 |
+
energy=format_energy(impacts.energy),
|
| 134 |
+
gwp=format_gwp(impacts.gwp),
|
| 135 |
+
adpe=format_adpe(impacts.adpe),
|
| 136 |
+
pe=format_pe(impacts.pe)
|
| 137 |
+
)
|
| 138 |
|
| 139 |
def format_impacts_expert(impacts: Impacts) -> QImpacts:
|
| 140 |
return QImpacts(
|