Maga222006 commited on
Commit
a574ec0
·
1 Parent(s): dad059c

MultiagentPersonalAssistant

Browse files
.idea/MultiagentPersonalAssistant.iml CHANGED
@@ -2,7 +2,7 @@
2
  <module type="PYTHON_MODULE" version="4">
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$" />
5
- <orderEntry type="jdk" jdkName="test" jdkType="Python SDK" />
6
  <orderEntry type="sourceFolder" forTests="false" />
7
  </component>
8
  <component name="PyDocumentationSettings">
 
2
  <module type="PYTHON_MODULE" version="4">
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="MultiagentPersonalAssistant" jdkType="Python SDK" />
6
  <orderEntry type="sourceFolder" forTests="false" />
7
  </component>
8
  <component name="PyDocumentationSettings">
.idea/misc.xml CHANGED
@@ -3,5 +3,5 @@
3
  <component name="Black">
4
  <option name="sdkName" value="MultiagentPersonalAssistant" />
5
  </component>
6
- <component name="ProjectRootManager" version="2" project-jdk-name="test" project-jdk-type="Python SDK" />
7
  </project>
 
3
  <component name="Black">
4
  <option name="sdkName" value="MultiagentPersonalAssistant" />
5
  </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="MultiagentPersonalAssistant" project-jdk-type="Python SDK" />
7
  </project>
agent/__pycache__/file_preprocessing.cpython-312.pyc CHANGED
Binary files a/agent/__pycache__/file_preprocessing.cpython-312.pyc and b/agent/__pycache__/file_preprocessing.cpython-312.pyc differ
 
agent/__pycache__/models.cpython-312.pyc CHANGED
Binary files a/agent/__pycache__/models.cpython-312.pyc and b/agent/__pycache__/models.cpython-312.pyc differ
 
agent/file_preprocessing.py CHANGED
@@ -1,19 +1,12 @@
1
- from speechbrain.inference.classifiers import EncoderClassifier
2
  from langchain_core.messages import HumanMessage
3
- import speech_recognition as sr
4
- from pydub import AudioSegment
5
  from dotenv import load_dotenv
6
  from PyPDF2 import PdfReader
7
  from docx import Document
8
- import torchaudio
9
  import mimetypes
10
  import asyncio
11
  import base64
12
- import os
13
 
14
  load_dotenv()
15
- language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
16
-
17
 
18
  async def preprocess_file(file_name: str):
19
  mime_type = mimetypes.guess_type(file_name)[0]
@@ -22,49 +15,18 @@ async def preprocess_file(file_name: str):
22
  elif "video" in mime_type:
23
  prompt = "Give a detailed description of the video."
24
  elif "audio" in mime_type:
25
- return await asyncio.to_thread(preprocess_audio, file_name)
26
  else:
27
  return await asyncio.to_thread(preprocess_text, file_name, mime_type)
28
 
29
 
30
- def preprocess_audio(file_name: str):
31
- if not os.path.exists(file_name):
32
- raise FileNotFoundError(f"File not found: {file_name}")
33
-
34
- wav_file = os.path.splitext(file_name)[0] + ".wav"
35
- audio = AudioSegment.from_file(file_name)
36
- audio.export(wav_file, format="wav")
37
- signal = language_id.load_audio(wav_file)
38
- out = language_id.classify_batch(signal)[0].tolist()[0]
39
- lang_mapping = {
40
- 20: "en",
41
- 106: "zh",
42
- 35: "hi",
43
- 22: "es",
44
- 3: "ar",
45
- 28: "fr",
46
- 77: "ru",
47
- 75: "pt",
48
- 9: "bn",
49
- 45: "ja",
50
- 18: "de",
51
- 51: "ko",
52
- 102: "vi",
53
- 99: "uk"
54
- }
55
- scores = [out[index] for index in lang_mapping.keys()]
56
- language = list(lang_mapping.values())[scores.index(max(scores))]
57
- recognizer = sr.Recognizer()
58
- with sr.AudioFile(wav_file) as source:
59
- audio_data = recognizer.record(source)
60
- try:
61
- text = recognizer.recognize_google(audio_data, language=language)
62
- except sr.UnknownValueError:
63
- text = "[Unintelligible audio]"
64
- except sr.RequestError as e:
65
- text = f"[API error: {e}]"
66
- os.remove(wav_file)
67
- return text
68
 
69
 
70
  async def preprocess_image(file_name: str):
 
 
1
  from langchain_core.messages import HumanMessage
 
 
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
  from docx import Document
 
5
  import mimetypes
6
  import asyncio
7
  import base64
 
8
 
9
  load_dotenv()
 
 
10
 
11
  async def preprocess_file(file_name: str):
12
  mime_type = mimetypes.guess_type(file_name)[0]
 
15
  elif "video" in mime_type:
16
  prompt = "Give a detailed description of the video."
17
  elif "audio" in mime_type:
18
+ return await preprocess_audio(file_name)
19
  else:
20
  return await asyncio.to_thread(preprocess_text, file_name, mime_type)
21
 
22
 
23
+ async def preprocess_audio(file_name):
24
+ from agent.models import groq_client
25
+ transcription = await groq_client.audio.transcriptions.create(
26
+ model="whisper-large-v3-turbo",
27
+ file=open(file_name, "rb")
28
+ )
29
+ return transcription.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  async def preprocess_image(file_name: str):
agent/models.py CHANGED
@@ -1,6 +1,13 @@
1
  from langchain.chat_models import init_chat_model
2
  from dotenv import load_dotenv
 
 
 
3
  load_dotenv()
 
 
 
 
4
 
5
  llm_supervisor = init_chat_model(
6
  model="groq:openai/gpt-oss-120b",
@@ -17,4 +24,4 @@ llm_agents = init_chat_model(
17
 
18
  llm_image = init_chat_model(
19
  model="groq:meta-llama/llama-4-scout-17b-16e-instruct"
20
- )
 
1
  from langchain.chat_models import init_chat_model
2
  from dotenv import load_dotenv
3
+ from openai import AsyncOpenAI
4
+ import os
5
+
6
  load_dotenv()
7
+ groq_client = AsyncOpenAI(
8
+ base_url="https://api.groq.com/openai/v1",
9
+ api_key=os.getenv("GROQ_API_KEY"),
10
+ )
11
 
12
  llm_supervisor = init_chat_model(
13
  model="groq:openai/gpt-oss-120b",
 
24
 
25
  llm_image = init_chat_model(
26
  model="groq:meta-llama/llama-4-scout-17b-16e-instruct"
27
+ )
requirements.txt CHANGED
@@ -7,14 +7,13 @@ openai>=1.99.1
7
  transformers>=4.53.2
8
  timm>=1.0.19
9
  langchain-groq>=0.3.7
10
- torchaudio>=2.8.0
11
- torch>=2.8.0
12
  langgraph-supervisor>=0.0.29
13
  wikipedia>=1.4.0
14
  wolframalpha>=5.1.3
15
  python-multipart
16
  soundfile>=0.13.1
17
  accelerate
 
18
  langmem>=0.0.29
19
  greenlet>=3.2.3
20
  deepagents>=0.0.3
@@ -27,7 +26,6 @@ pillow>=11.3.0
27
  langchain-huggingface>=0.3.0
28
  sentence-transformers>=5.0.0
29
  pyowm>=3.3.0
30
- speechbrain>=1.0.3
31
  langchain-core
32
  geopy>=2.4.1
33
  langchain-community>=0.3.27
 
7
  transformers>=4.53.2
8
  timm>=1.0.19
9
  langchain-groq>=0.3.7
 
 
10
  langgraph-supervisor>=0.0.29
11
  wikipedia>=1.4.0
12
  wolframalpha>=5.1.3
13
  python-multipart
14
  soundfile>=0.13.1
15
  accelerate
16
+ jsonpointer
17
  langmem>=0.0.29
18
  greenlet>=3.2.3
19
  deepagents>=0.0.3
 
26
  langchain-huggingface>=0.3.0
27
  sentence-transformers>=5.0.0
28
  pyowm>=3.3.0
 
29
  langchain-core
30
  geopy>=2.4.1
31
  langchain-community>=0.3.27
tmp/classifier.ckpt DELETED
@@ -1 +0,0 @@
1
- /Users/magomedpatahov/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/0253049ae131d6a4be1c4f0d8b0ff483a0f8c8e9/classifier.ckpt
 
 
tmp/embedding_model.ckpt DELETED
@@ -1 +0,0 @@
1
- /Users/magomedpatahov/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/0253049ae131d6a4be1c4f0d8b0ff483a0f8c8e9/embedding_model.ckpt
 
 
tmp/hyperparams.yaml DELETED
@@ -1 +0,0 @@
1
- /Users/magomedpatahov/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/0253049ae131d6a4be1c4f0d8b0ff483a0f8c8e9/hyperparams.yaml
 
 
tmp/label_encoder.ckpt DELETED
@@ -1 +0,0 @@
1
- /Users/magomedpatahov/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/0253049ae131d6a4be1c4f0d8b0ff483a0f8c8e9/label_encoder.txt