Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -792,116 +792,169 @@ Generate background music using Video2Music by providing an input video.
|
|
| 792 |
"""
|
| 793 |
|
| 794 |
|
| 795 |
-
# input_video = gr.Video(label="Input Video")
|
| 796 |
-
# input_primer = gr.Textbox(label="Input Primer", value="C Am F G")
|
| 797 |
-
# input_key = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key")
|
| 798 |
-
# output_video = gr.Video(label="Output Video")
|
| 799 |
-
# input_youtube = gr.Textbox(label="YouTube URL")
|
| 800 |
-
|
| 801 |
css = """
|
| 802 |
.gradio-container {
|
| 803 |
font-family: 'Inter', -apple-system, system-ui, sans-serif;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
.gr-button {
|
| 806 |
color: white;
|
| 807 |
background: #4CAF50;
|
| 808 |
border-radius: 8px;
|
| 809 |
-
padding:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 810 |
}
|
| 811 |
-
"""
|
| 812 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
# border-radius: 100vh;
|
| 820 |
-
# }
|
| 821 |
-
# '''
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
# Gradio interface
|
| 826 |
-
# gr_interface = gr.Interface(
|
| 827 |
-
# fn=gradio_generate,
|
| 828 |
-
# inputs=[input_video, input_primer, input_key ],
|
| 829 |
-
# outputs=[output_video],
|
| 830 |
-
# description=description_text,
|
| 831 |
-
# allow_flagging='never',
|
| 832 |
-
# cache_examples=True,
|
| 833 |
-
# )
|
| 834 |
-
|
| 835 |
-
# gr_interface2 = gr.Interface(
|
| 836 |
-
# fn=gradio_generate2,
|
| 837 |
-
# inputs=[input_youtube, input_primer, input_key ],
|
| 838 |
-
# outputs=[output_video],
|
| 839 |
-
# description=description_text,
|
| 840 |
-
# allow_flagging='never',
|
| 841 |
-
# cache_examples=True,
|
| 842 |
-
# )
|
| 843 |
-
|
| 844 |
-
def filter(choice):
|
| 845 |
-
if choice == "Upload Video":
|
| 846 |
-
return [gr.update(visible=True), gr.update(visible=False)]
|
| 847 |
-
if choice == "YouTube URL":
|
| 848 |
-
return [gr.update(visible=False), gr.update(visible=True)]
|
| 849 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 850 |
|
| 851 |
-
# with gr.Blocks() as demo:
|
| 852 |
with gr.Blocks(css=css) as demo:
|
| 853 |
-
|
| 854 |
-
|
|
|
|
|
|
|
| 855 |
"""
|
| 856 |
This is the demo for Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model.
|
| 857 |
[Read our paper](https://arxiv.org/abs/2311.00968).
|
| 858 |
"""
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
input_video = gr.Video(label="Input Video", max_length=299)
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 877 |
input_video_yt = gr.Textbox(label="YouTube URL")
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
|
| 889 |
radio.change(filter, radio, [rowA, rowB])
|
| 890 |
-
|
| 891 |
btn.click(
|
| 892 |
-
fn=gradio_generate,
|
| 893 |
-
inputs=[input_video,input_primer,input_key],
|
| 894 |
outputs=[output_video],
|
| 895 |
)
|
| 896 |
-
|
| 897 |
btn_yt.click(
|
| 898 |
-
fn=gradio_generate2,
|
| 899 |
-
inputs=[input_video_yt,input_primer_yt,input_key_yt],
|
| 900 |
outputs=[output_video],
|
| 901 |
)
|
| 902 |
|
| 903 |
-
#demo.queue()
|
| 904 |
-
# demo.launch(debug=True)
|
| 905 |
-
|
| 906 |
demo.queue().launch()
|
| 907 |
|
|
|
|
|
|
|
|
|
| 792 |
"""
|
| 793 |
|
| 794 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
css = """
|
| 796 |
.gradio-container {
|
| 797 |
font-family: 'Inter', -apple-system, system-ui, sans-serif;
|
| 798 |
+
max-width: 1200px;
|
| 799 |
+
margin: 0 auto;
|
| 800 |
+
padding: 1rem;
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
.container {
|
| 804 |
+
display: flex;
|
| 805 |
+
flex-direction: column;
|
| 806 |
+
gap: 1rem;
|
| 807 |
+
width: 100%;
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
.input-section, .output-section {
|
| 811 |
+
display: flex;
|
| 812 |
+
flex-direction: column;
|
| 813 |
+
gap: 1rem;
|
| 814 |
+
width: 100%;
|
| 815 |
+
}
|
| 816 |
+
|
| 817 |
+
.row {
|
| 818 |
+
display: flex;
|
| 819 |
+
flex-direction: row;
|
| 820 |
+
gap: 1rem;
|
| 821 |
+
width: 100%;
|
| 822 |
+
}
|
| 823 |
+
|
| 824 |
+
@media (max-width: 768px) {
|
| 825 |
+
.row {
|
| 826 |
+
flex-direction: column;
|
| 827 |
+
}
|
| 828 |
}
|
| 829 |
+
|
| 830 |
+
.column {
|
| 831 |
+
flex: 1;
|
| 832 |
+
min-width: 0;
|
| 833 |
+
}
|
| 834 |
+
|
| 835 |
.gr-button {
|
| 836 |
color: white;
|
| 837 |
background: #4CAF50;
|
| 838 |
border-radius: 8px;
|
| 839 |
+
padding: 0.75rem 1.5rem;
|
| 840 |
+
border: none;
|
| 841 |
+
cursor: pointer;
|
| 842 |
+
transition: background 0.3s ease;
|
| 843 |
+
}
|
| 844 |
+
|
| 845 |
+
.gr-button:hover {
|
| 846 |
+
background: #45a049;
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
.gr-form {
|
| 850 |
+
border: 1px solid #e0e0e0;
|
| 851 |
+
border-radius: 8px;
|
| 852 |
+
padding: 1rem;
|
| 853 |
+
background: white;
|
| 854 |
}
|
|
|
|
| 855 |
|
| 856 |
+
.gr-box {
|
| 857 |
+
border-radius: 8px;
|
| 858 |
+
background: white;
|
| 859 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 860 |
+
}
|
| 861 |
|
| 862 |
+
/* Safari-specific fixes */
|
| 863 |
+
@supports (-webkit-touch-callout: none) {
|
| 864 |
+
.gradio-container {
|
| 865 |
+
-webkit-text-size-adjust: 100%;
|
| 866 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
|
| 868 |
+
.row {
|
| 869 |
+
display: -webkit-box;
|
| 870 |
+
display: -webkit-flex;
|
| 871 |
+
display: flex;
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
.column {
|
| 875 |
+
-webkit-box-flex: 1;
|
| 876 |
+
-webkit-flex: 1;
|
| 877 |
+
flex: 1;
|
| 878 |
+
}
|
| 879 |
+
}
|
| 880 |
+
"""
|
| 881 |
|
|
|
|
| 882 |
with gr.Blocks(css=css) as demo:
|
| 883 |
+
with gr.Column(elem_classes="container"):
|
| 884 |
+
# Title section
|
| 885 |
+
gr.HTML(f"<h1 style='text-align: center; margin-bottom: 1rem;'>{title}</h1>")
|
| 886 |
+
gr.Markdown(
|
| 887 |
"""
|
| 888 |
This is the demo for Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model.
|
| 889 |
[Read our paper](https://arxiv.org/abs/2311.00968).
|
| 890 |
"""
|
| 891 |
+
)
|
| 892 |
+
|
| 893 |
+
# Input method selection
|
| 894 |
+
radio = gr.Radio(
|
| 895 |
+
["Upload Video", "YouTube URL"],
|
| 896 |
+
value="Upload Video",
|
| 897 |
+
label="Choose the input method",
|
| 898 |
+
elem_classes="gr-form"
|
| 899 |
+
)
|
| 900 |
+
|
| 901 |
+
# Main content area
|
| 902 |
+
with gr.Row(elem_classes="row") as main_content:
|
| 903 |
+
# Left column - Inputs
|
| 904 |
+
with gr.Column(elem_classes="column input-section"):
|
| 905 |
+
# Upload Video section
|
| 906 |
+
with gr.Column(visible=True, elem_classes="gr-form") as rowA:
|
| 907 |
input_video = gr.Video(label="Input Video", max_length=299)
|
| 908 |
+
with gr.Row(elem_classes="row"):
|
| 909 |
+
input_primer = gr.Textbox(
|
| 910 |
+
label="Input Primer",
|
| 911 |
+
placeholder="(e.g., C Am F G)",
|
| 912 |
+
value="",
|
| 913 |
+
info="Supported types: dim, sus4, min7(m7), min(m), sus2, aug, dim7, maj6(M6), hdim7, 7, min6(m6), maj7(M7)"
|
| 914 |
+
)
|
| 915 |
+
input_key = gr.Dropdown(
|
| 916 |
+
choices=all_key_names,
|
| 917 |
+
value="C major",
|
| 918 |
+
label="Input Key"
|
| 919 |
+
)
|
| 920 |
+
btn = gr.Button("Generate", elem_classes="gr-button")
|
| 921 |
+
|
| 922 |
+
# YouTube URL section
|
| 923 |
+
with gr.Column(visible=False, elem_classes="gr-form") as rowB:
|
| 924 |
input_video_yt = gr.Textbox(label="YouTube URL")
|
| 925 |
+
with gr.Row(elem_classes="row"):
|
| 926 |
+
input_primer_yt = gr.Textbox(
|
| 927 |
+
label="Input Primer",
|
| 928 |
+
placeholder="(e.g., C Am F G)",
|
| 929 |
+
value="",
|
| 930 |
+
info="Supported types: dim, sus4, min7(m7), min(m), sus2, aug, dim7, maj6(M6), hdim7, 7, min6(m6), maj7(M7)"
|
| 931 |
+
)
|
| 932 |
+
input_key_yt = gr.Dropdown(
|
| 933 |
+
choices=all_key_names,
|
| 934 |
+
value="C major",
|
| 935 |
+
label="Input Key"
|
| 936 |
+
)
|
| 937 |
+
btn_yt = gr.Button("Generate", elem_classes="gr-button")
|
| 938 |
+
|
| 939 |
+
# Right column - Output
|
| 940 |
+
with gr.Column(elem_classes="column output-section"):
|
| 941 |
+
output_video = gr.Video(label="Output Video", elem_classes="gr-box")
|
| 942 |
|
| 943 |
radio.change(filter, radio, [rowA, rowB])
|
| 944 |
+
|
| 945 |
btn.click(
|
| 946 |
+
fn=gradio_generate,
|
| 947 |
+
inputs=[input_video, input_primer, input_key],
|
| 948 |
outputs=[output_video],
|
| 949 |
)
|
| 950 |
+
|
| 951 |
btn_yt.click(
|
| 952 |
+
fn=gradio_generate2,
|
| 953 |
+
inputs=[input_video_yt, input_primer_yt, input_key_yt],
|
| 954 |
outputs=[output_video],
|
| 955 |
)
|
| 956 |
|
|
|
|
|
|
|
|
|
|
| 957 |
demo.queue().launch()
|
| 958 |
|
| 959 |
+
|
| 960 |
+
|