File size: 12,595 Bytes
fca6c6f
77e698c
fca6c6f
77e698c
 
fca6c6f
77e698c
fca6c6f
 
 
77e698c
 
fca6c6f
77e698c
 
fca6c6f
 
 
77e698c
 
fca6c6f
 
 
77e698c
 
fca6c6f
 
77e698c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fca6c6f
 
 
77e698c
 
 
 
fca6c6f
77e698c
 
 
 
 
 
 
fca6c6f
77e698c
fca6c6f
 
 
77e698c
 
 
 
 
 
fca6c6f
0822954
fca6c6f
77e698c
 
 
 
 
 
 
 
 
 
 
 
 
 
fca6c6f
77e698c
 
 
 
fca6c6f
77e698c
 
fca6c6f
77e698c
 
fca6c6f
77e698c
fca6c6f
 
 
77e698c
 
 
 
 
 
 
fca6c6f
 
 
 
 
 
77e698c
fca6c6f
77e698c
 
 
 
 
 
 
fca6c6f
 
 
77e698c
fca6c6f
77e698c
 
 
 
 
 
fca6c6f
77e698c
 
 
 
fca6c6f
77e698c
 
 
 
 
 
fca6c6f
 
 
77e698c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fca6c6f
77e698c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fca6c6f
 
 
 
 
 
77e698c
fca6c6f
77e698c
 
 
 
 
 
fca6c6f
 
 
 
 
 
 
 
77e698c
 
 
fca6c6f
77e698c
fca6c6f
77e698c
fca6c6f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
<!DOCTYPE html>
<html lang="en" >
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion</title>
  <link rel="shortcut icon" href="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/favicon.ico" />
  <meta property="og:image" content="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/teaser.png"/>
  <meta property="og:title" content="Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion" />
  <meta property="og:description" content="Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion" />
  
  <!-- Tailwind CSS CDN -->
  <script src="https://cdn.tailwindcss.com"></script>
  <!-- Google Fonts -->
  <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap" rel="stylesheet" />
  <!-- Font Awesome -->
  <script src="https://kit.fontawesome.com/ad96f96272.js" crossorigin="anonymous"></script>
  <!-- Academicons -->
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css" />
  
  <style>
    body {
      font-family: 'Poppins', sans-serif;
      background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
      color: #e0e7ff;
      min-height: 100vh;
    }
    /* Glassmorphism container */
    .glass {
      background: rgba(30, 41, 59, 0.75);
      backdrop-filter: blur(12px);
      border-radius: 1rem;
      border: 1px solid rgba(255, 255, 255, 0.1);
      box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
    }
    /* Gradient text */
    .gradient-text {
      background: linear-gradient(90deg, #3b82f6, #8b5cf6);
      -webkit-background-clip: text;
      -webkit-text-fill-color: transparent;
    }
    /* Scrollbar for tables */
    .scrollbar-thin::-webkit-scrollbar {
      height: 6px;
    }
    .scrollbar-thin::-webkit-scrollbar-thumb {
      background-color: #6366f1;
      border-radius: 10px;
    }
    /* Back to top button */
    #btn-back-to-top {
      background: linear-gradient(90deg, #3b82f6, #8b5cf6);
      box-shadow: 0 4px 15px rgba(59, 130, 246, 0.5);
    }
    #btn-back-to-top:hover {
      background: linear-gradient(90deg, #8b5cf6, #3b82f6);
    }
  </style>
</head>
<body class="relative">

  <!-- Navigation -->
  <nav class="glass sticky top-0 z-50 shadow-lg">
    <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
      <div class="flex justify-between items-center h-16">
        <a href="#" class="text-2xl font-extrabold gradient-text tracking-tight">Zero-Shot Audio Editing</a>
        <div class="hidden md:flex space-x-8 text-gray-300 font-semibold">
          <a href="#supsamples" class="hover:text-indigo-400 transition">Text-based Editing</a>
          <a href="#unsupsamples" class="hover:text-indigo-400 transition">Unsup. Editing</a>
          <a href="#supcomparisons" class="hover:text-indigo-400 transition">Text-Based Editing Comp.</a>
          <a href="#unsupcomparisons" class="hover:text-indigo-400 transition">Unsup. Editing Comp.</a>
        </div>
        <button id="mobile-menu-btn" class="md:hidden text-gray-300 hover:text-indigo-400 focus:outline-none text-2xl">
          <i class="fas fa-bars"></i>
        </button>
      </div>
      <!-- Mobile menu -->
      <div id="mobile-menu" class="hidden flex-col space-y-3 pb-4 text-gray-300 font-semibold md:hidden">
        <a href="#supsamples" class="block hover:text-indigo-400 transition">Text-based Editing</a>
        <a href="#unsupsamples" class="block hover:text-indigo-400 transition">Unsup. Editing</a>
        <a href="#supcomparisons" class="block hover:text-indigo-400 transition">Text-Based Editing Comp.</a>
        <a href="#unsupcomparisons" class="block hover:text-indigo-400 transition">Unsup. Editing Comp.</a>
      </div>
    </div>
  </nav>

  <!-- Main Content -->
  <main class="max-w-5xl mx-auto px-4 sm:px-6 lg:px-8 py-10 space-y-12">

    <!-- Header -->
    <header class="text-center space-y-4">
      <h1 class="text-4xl md:text-5xl font-extrabold gradient-text leading-tight">
        Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion
      </h1>
      <h4 class="text-xl font-semibold">ICML 2024</h4>
      <div class="flex flex-wrap justify-center gap-6 text-lg text-indigo-300">
        <a href="https://www.linkedin.com/in/hilamanor/" target="_blank" class="hover:text-indigo-400">Hila Manor</a>
        <span>|</span>
        <a href="https://tomer.net.technion.ac.il/" target="_blank" class="hover:text-indigo-400">Tomer Michaeli</a>
      </div>
      <p class="text-indigo-200">Technion - Israel Institute of Technology</p>
      <div class="flex flex-wrap justify-center gap-4 mt-4">
        <a href="https://arxiv.org/abs/2402.10009" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
          <i class="ai ai-arxiv text-xl"></i> ArXiv
        </a>
        <a href="https://github.com/HilaManor/AudioEditingCode/" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
          <i class="fab fa-github text-xl"></i> Code
        </a>
        <a href="https://youtu.be/lBnldOQVbS4" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
          <i class="fab fa-youtube text-xl"></i> Presentation
        </a>
        <a href="https://huggingface.co/spaces/hilamanor/audioEditing/" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
          🤗 Text-Based Space
        </a>
      </div>
    </header>

    <!-- Abstract + Teaser -->
    <section class="glass p-6 rounded-xl shadow-lg">
      <img src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/teaser.png" alt="Teaser" class="mx-auto rounded-lg shadow-lg max-w-full h-auto mb-6" />
      <h2 class="text-2xl font-semibold mb-4 text-center">Abstract</h2>
      <p class="leading-relaxed text-indigo-100 max-w-3xl mx-auto text-center">
        Editing signals using large pre-trained models, in a zero-shot manner, has recently seen rapid advancements in the image domain. However, this wave has yet to reach the audio domain.
        In this paper, we explore two zero-shot editing techniques for audio signals, which use DDPM inversion on pre-trained diffusion models. The first, adopted from the image domain, allows text-based editing.
        The second, is a novel approach for discovering semantically meaningful editing directions without supervision.
        When applied to music signals, this method exposes a range of musically interesting modifications, from controlling the participation of specific instruments to improvisations on the melody.
      </p>
    </section>

    <!-- Video Overview -->
    <section class="text-center space-y-4">
      <h2 class="text-2xl font-semibold">Video Overview</h2>
      <p class="text-indigo-300 max-w-xl mx-auto">
        For people in a hurry. Images generated by <a href="https://openai.com/dall-e-2" target="_blank" class="text-indigo-400 hover:underline">DALL-E 2</a> and <a href="https://www.bing.com/copilot" target="_blank" class="text-indigo-400 hover:underline">Copilot</a>.
      </p>
      <video controls class="mx-auto rounded-lg shadow-lg max-w-full w-full sm:w-3/4 md:w-2/3" preload="metadata">
        <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/overview.mp4" type="video/mp4" />
        Your browser does not support the video tag.
      </video>
    </section>

    <!-- Samples of Editing -->
    <section>
      <h2 class="text-3xl font-bold mb-6 text-center">1. Samples of Editing</h2>
      <p class="text-center text-indigo-300 mb-8 max-w-3xl mx-auto">
        We present samples of audio editing using our proposed methods. The samples are organized into two sections: text-based editing and unsupervised editing.
      </p>

      <!-- Text-Based Editing Table -->
      <h3 id="supsamples" class="text-2xl font-semibold mb-4">1.1. Samples of Text-Based Editing</h3>
      <div class="overflow-x-auto rounded-lg shadow-lg">
        <table class="min-w-full text-left text-indigo-100 border border-indigo-700 rounded-lg table-auto">
          <thead class="bg-indigo-900/90">
            <tr>
              <th class="px-3 py-2 border border-indigo-700">#</th>
              <th class="px-3 py-2 border border-indigo-700">Source Prompt</th>
              <th class="px-3 py-2 border border-indigo-700">Target Prompt</th>
              <th class="px-3 py-2 border border-indigo-700">Original Audio</th>
              <th class="px-3 py-2 border border-indigo-700">Edited Audio</th>
              <th class="px-3 py-2 border border-indigo-700">Edit T<sub>start</sub></th>
            </tr>
          </thead>
          <tbody>
            <tr class="hover:bg-indigo-700/30 transition">
              <td class="border border-indigo-700 px-3 py-2">1</td>
              <td class="border border-indigo-700 px-3 py-2">A recording of a <b>sneaky jazz</b> song.</td>
              <td class="border border-indigo-700 px-3 py-2">A recording of a <b>tense classical</b> music score.</td>
              <td class="border border-indigo-700 px-3 py-2">
                <audio controls preload="metadata" class="w-40 rounded">
                  <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/orig/MDDBFreeJazz.mp3" type="audio/mp4" />
                  Your browser does not support the audio element.
                </audio>
              </td>
              <td class="border border-indigo-700 px-3 py-2">
                <audio controls preload="metadata" class="w-40 rounded">
                  <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/sup_samples/orchestra_MDDBFreeJazz_ours_90.mp3" type="audio/mp4" />
                  Your browser does not support the audio element.
                </audio>
              </td>
              <td class="border border-indigo-700 px-3 py-2 text-center">110</td>
            </tr>
            <tr class="hover:bg-indigo-700/30 transition">
              <td class="border border-indigo-700 px-3 py-2">2</td>
              <td class="border border-indigo-700 px-3 py-2">A recording of a <b>hard rock</b> song.</td>
              <td class="border border-indigo-700 px-3 py-2">A recording of a <b>jazz</b> song.</td>
              <td class="border border-indigo-700 px-3 py-2">
                <audio controls preload="metadata" class="w-40 rounded">
                  <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/orig/MDDBZeppelin.mp3" type="audio/mp4" />
                  Your browser does not support the audio element.
                </audio>
              </td>
              <td class="border border-indigo-700 px-3 py-2">
                <audio controls preload="metadata" class="w-40 rounded">
                  <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/sup_samples/jazz_MDDBZeppelin_ours_100.mp3" type="audio/mp4" />
                  Your browser does not support the audio element.
                </audio>
              </td>
              <td class="border border-indigo-700 px-3 py-2 text-center">100</td>
            </tr>
            <!-- Add more rows as needed -->
          </tbody>
        </table>
      </div>
    </section>

  </main>

  <!-- Back to Top Button -->
  <button id="btn-back-to-top" class="fixed bottom-8 right-8 p-4 rounded-full text-white shadow-lg hidden z-50" aria-label="Back to top">
    <i class="fas fa-arrow-up"></i>
  </button>

  <script>
    // Mobile menu toggle
    const menuBtn = document.getElementById('mobile-menu-btn');
    const menu = document.getElementById('mobile-menu');
    menuBtn.addEventListener('click', () => {
      menu.classList.toggle('hidden');
    });

    // Back to top button visibility and scroll
    const backToTopBtn = document.getElementById('btn-back-to-top');
    window.addEventListener('scroll', () => {
      if (window.scrollY > 300) {
        backToTopBtn.classList.remove('hidden');
      } else {
        backToTopBtn.classList.add('hidden');
      }
    });