Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -118,13 +118,13 @@ class ConversationBot:
|
|
| 118 |
audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
|
| 119 |
audio_load = whisper.load_audio(file.name)
|
| 120 |
soundfile.write(audio_filename, audio_load, samplerate = 16000)
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
# AI_prompt = "Received. "
|
| 125 |
-
# self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
|
| 126 |
AI_prompt = "Received. "
|
| 127 |
-
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
|
|
|
|
|
|
| 128 |
print("======>Current memory:\n %s" % self.agent.memory)
|
| 129 |
#state = state + [(f"<audio src=audio_filename controls=controls></audio>*{audio_filename}*", AI_prompt)]
|
| 130 |
state = state + [(f"*{audio_filename}*", AI_prompt)]
|
|
@@ -146,8 +146,11 @@ class ConversationBot:
|
|
| 146 |
img = img.convert('RGB')
|
| 147 |
img.save(image_filename, "PNG")
|
| 148 |
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
|
|
|
|
|
|
|
|
|
|
| 149 |
AI_prompt = "Received. "
|
| 150 |
-
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
| 151 |
print("======>Current memory:\n %s" % self.agent.memory)
|
| 152 |
state = state + [(f"*{image_filename}*", AI_prompt)]
|
| 153 |
print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
|
|
@@ -159,7 +162,7 @@ class ConversationBot:
|
|
| 159 |
print("Inputs:", state)
|
| 160 |
print("======>Previous memory:\n %s" % self.agent.memory)
|
| 161 |
# inpaint = Inpaint(device="cpu")
|
| 162 |
-
new_image_filename, new_audio_filename = self.
|
| 163 |
AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"*{new_image_filename}*"
|
| 164 |
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
| 165 |
print("======>Current memory:\n %s" % self.agent.memory)
|
|
|
|
| 118 |
audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
|
| 119 |
audio_load = whisper.load_audio(file.name)
|
| 120 |
soundfile.write(audio_filename, audio_load, samplerate = 16000)
|
| 121 |
+
description = self.models['A2T'].inference(audio_filename)
|
| 122 |
+
Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
|
| 123 |
+
"rather than directly imagine from my description. If you understand, say \"Received\". \n".format(audio_filename, description)
|
|
|
|
|
|
|
| 124 |
AI_prompt = "Received. "
|
| 125 |
+
self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
|
| 126 |
+
# AI_prompt = "Received. "
|
| 127 |
+
# self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
| 128 |
print("======>Current memory:\n %s" % self.agent.memory)
|
| 129 |
#state = state + [(f"<audio src=audio_filename controls=controls></audio>*{audio_filename}*", AI_prompt)]
|
| 130 |
state = state + [(f"*{audio_filename}*", AI_prompt)]
|
|
|
|
| 146 |
img = img.convert('RGB')
|
| 147 |
img.save(image_filename, "PNG")
|
| 148 |
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
|
| 149 |
+
description = self.models['ImageCaptioning'].inference(image_filename)
|
| 150 |
+
Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
|
| 151 |
+
"rather than directly imagine from my description. If you understand, say \"Received\". \n".format(image_filename, description)
|
| 152 |
AI_prompt = "Received. "
|
| 153 |
+
self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
|
| 154 |
print("======>Current memory:\n %s" % self.agent.memory)
|
| 155 |
state = state + [(f"*{image_filename}*", AI_prompt)]
|
| 156 |
print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
|
|
|
|
| 162 |
print("Inputs:", state)
|
| 163 |
print("======>Previous memory:\n %s" % self.agent.memory)
|
| 164 |
# inpaint = Inpaint(device="cpu")
|
| 165 |
+
new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
|
| 166 |
AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"*{new_image_filename}*"
|
| 167 |
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
| 168 |
print("======>Current memory:\n %s" % self.agent.memory)
|