Commit
·
817cd1e
1
Parent(s):
16293fe
add function to watch current vram
Browse files- app.py +15 -0
- gpu_info.py +67 -0
app.py
CHANGED
|
@@ -6,6 +6,8 @@ from typing import List, Tuple
|
|
| 6 |
import gradio as gr
|
| 7 |
import spaces
|
| 8 |
|
|
|
|
|
|
|
| 9 |
PWD = os.path.dirname(__file__)
|
| 10 |
CHECKPOINTS_PATH = "/data/checkpoints"
|
| 11 |
# CHECKPOINTS_PATH = os.path.join(PWD, "checkpoints")
|
|
@@ -285,7 +287,10 @@ def generate_video(
|
|
| 285 |
|
| 286 |
log.info(f"actual_seed: {actual_seed}")
|
| 287 |
|
|
|
|
| 288 |
start_time = time.time()
|
|
|
|
|
|
|
| 289 |
args, control_inputs = parse_arguments(
|
| 290 |
controlnet_specs_in={
|
| 291 |
"hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
|
|
@@ -301,10 +306,20 @@ def generate_video(
|
|
| 301 |
num_gpus=1,
|
| 302 |
seed=seed,
|
| 303 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
videos, prompts = inference(args, control_inputs)
|
|
|
|
|
|
|
| 305 |
end_time = time.time()
|
| 306 |
log.info(f"Time taken: {end_time - start_time} s")
|
| 307 |
|
|
|
|
|
|
|
|
|
|
| 308 |
video = videos[0]
|
| 309 |
return video, video, actual_seed
|
| 310 |
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import spaces
|
| 8 |
|
| 9 |
+
from gpu_info import watch_gpu_memory
|
| 10 |
+
|
| 11 |
PWD = os.path.dirname(__file__)
|
| 12 |
CHECKPOINTS_PATH = "/data/checkpoints"
|
| 13 |
# CHECKPOINTS_PATH = os.path.join(PWD, "checkpoints")
|
|
|
|
| 287 |
|
| 288 |
log.info(f"actual_seed: {actual_seed}")
|
| 289 |
|
| 290 |
+
# add timer to calculate the generation time
|
| 291 |
start_time = time.time()
|
| 292 |
+
|
| 293 |
+
# parse generation configs
|
| 294 |
args, control_inputs = parse_arguments(
|
| 295 |
controlnet_specs_in={
|
| 296 |
"hdmap": {"control_weight": 0.3, "input_control": hdmap_video_input},
|
|
|
|
| 306 |
num_gpus=1,
|
| 307 |
seed=seed,
|
| 308 |
)
|
| 309 |
+
|
| 310 |
+
# watch gpu memory
|
| 311 |
+
watcher = watch_gpu_memory(10)
|
| 312 |
+
|
| 313 |
+
# start inference
|
| 314 |
videos, prompts = inference(args, control_inputs)
|
| 315 |
+
|
| 316 |
+
# print the generation time
|
| 317 |
end_time = time.time()
|
| 318 |
log.info(f"Time taken: {end_time - start_time} s")
|
| 319 |
|
| 320 |
+
# stop the watcher
|
| 321 |
+
watcher.cancel()
|
| 322 |
+
|
| 323 |
video = videos[0]
|
| 324 |
return video, video, actual_seed
|
| 325 |
|
gpu_info.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from subprocess import check_output
|
| 2 |
+
from threading import Timer
|
| 3 |
+
from typing import Callable, List
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_gpu_memory() -> List[int]:
|
| 7 |
+
"""
|
| 8 |
+
Get the free GPU memory (VRAM) in MiB
|
| 9 |
+
|
| 10 |
+
:return memory_free_values: List of free GPU memory (VRAM) in MiB
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
command = "nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits"
|
| 14 |
+
memory_free_info = check_output(command.split()).decode("ascii").replace("\r", "").split("\n")[:-1]
|
| 15 |
+
memory_free_values = list(map(int, memory_free_info))
|
| 16 |
+
return memory_free_values
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class RepeatingTimer(Timer):
|
| 20 |
+
def run(self):
|
| 21 |
+
self.finished.wait(self.interval)
|
| 22 |
+
while not self.finished.is_set():
|
| 23 |
+
self.function(*self.args, **self.kwargs)
|
| 24 |
+
self.finished.wait(self.interval)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
gpu_memory_watcher: RepeatingTimer = None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def watch_gpu_memory(interval: int = 1, callback: Callable[[List[int]], None] = None) -> RepeatingTimer:
|
| 31 |
+
"""
|
| 32 |
+
Start a repeating timer to watch the GPU memory usage
|
| 33 |
+
|
| 34 |
+
:param interval: Interval in seconds
|
| 35 |
+
:return timer: RepeatingTimer object
|
| 36 |
+
"""
|
| 37 |
+
global gpu_memory_watcher
|
| 38 |
+
if gpu_memory_watcher is not None:
|
| 39 |
+
raise RuntimeError("GPU memory watcher is already running")
|
| 40 |
+
|
| 41 |
+
if callback is None:
|
| 42 |
+
callback = print
|
| 43 |
+
|
| 44 |
+
gpu_memory_watcher = RepeatingTimer(interval, lambda: callback(get_gpu_memory()))
|
| 45 |
+
gpu_memory_watcher.start()
|
| 46 |
+
|
| 47 |
+
return gpu_memory_watcher
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
from time import sleep
|
| 52 |
+
|
| 53 |
+
t = watch_gpu_memory()
|
| 54 |
+
|
| 55 |
+
counter = 0
|
| 56 |
+
while True:
|
| 57 |
+
sleep(1)
|
| 58 |
+
counter += 1
|
| 59 |
+
if counter == 10:
|
| 60 |
+
try:
|
| 61 |
+
watch_gpu_memory()
|
| 62 |
+
except RuntimeError:
|
| 63 |
+
print("Got exception")
|
| 64 |
+
pass
|
| 65 |
+
elif counter >= 20:
|
| 66 |
+
gpu_memory_watcher.cancel()
|
| 67 |
+
break
|