from huggingface_hub import InferenceClient import os client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=os.getenv("HF_TOKEN")) res = client.text_generation("Hello, this is a test.", max_new_tokens=20) print(res)