Spaces:
Running
Running
File size: 12,595 Bytes
fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 0822954 fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f 77e698c fca6c6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion</title>
<link rel="shortcut icon" href="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/favicon.ico" />
<meta property="og:image" content="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/teaser.png"/>
<meta property="og:title" content="Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion" />
<meta property="og:description" content="Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion" />
<!-- Tailwind CSS CDN -->
<script src="https://cdn.tailwindcss.com"></script>
<!-- Google Fonts -->
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap" rel="stylesheet" />
<!-- Font Awesome -->
<script src="https://kit.fontawesome.com/ad96f96272.js" crossorigin="anonymous"></script>
<!-- Academicons -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css" />
<style>
body {
font-family: 'Poppins', sans-serif;
background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
color: #e0e7ff;
min-height: 100vh;
}
/* Glassmorphism container */
.glass {
background: rgba(30, 41, 59, 0.75);
backdrop-filter: blur(12px);
border-radius: 1rem;
border: 1px solid rgba(255, 255, 255, 0.1);
box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
}
/* Gradient text */
.gradient-text {
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
/* Scrollbar for tables */
.scrollbar-thin::-webkit-scrollbar {
height: 6px;
}
.scrollbar-thin::-webkit-scrollbar-thumb {
background-color: #6366f1;
border-radius: 10px;
}
/* Back to top button */
#btn-back-to-top {
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
box-shadow: 0 4px 15px rgba(59, 130, 246, 0.5);
}
#btn-back-to-top:hover {
background: linear-gradient(90deg, #8b5cf6, #3b82f6);
}
</style>
</head>
<body class="relative">
<!-- Navigation -->
<nav class="glass sticky top-0 z-50 shadow-lg">
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="flex justify-between items-center h-16">
<a href="#" class="text-2xl font-extrabold gradient-text tracking-tight">Zero-Shot Audio Editing</a>
<div class="hidden md:flex space-x-8 text-gray-300 font-semibold">
<a href="#supsamples" class="hover:text-indigo-400 transition">Text-based Editing</a>
<a href="#unsupsamples" class="hover:text-indigo-400 transition">Unsup. Editing</a>
<a href="#supcomparisons" class="hover:text-indigo-400 transition">Text-Based Editing Comp.</a>
<a href="#unsupcomparisons" class="hover:text-indigo-400 transition">Unsup. Editing Comp.</a>
</div>
<button id="mobile-menu-btn" class="md:hidden text-gray-300 hover:text-indigo-400 focus:outline-none text-2xl">
<i class="fas fa-bars"></i>
</button>
</div>
<!-- Mobile menu -->
<div id="mobile-menu" class="hidden flex-col space-y-3 pb-4 text-gray-300 font-semibold md:hidden">
<a href="#supsamples" class="block hover:text-indigo-400 transition">Text-based Editing</a>
<a href="#unsupsamples" class="block hover:text-indigo-400 transition">Unsup. Editing</a>
<a href="#supcomparisons" class="block hover:text-indigo-400 transition">Text-Based Editing Comp.</a>
<a href="#unsupcomparisons" class="block hover:text-indigo-400 transition">Unsup. Editing Comp.</a>
</div>
</div>
</nav>
<!-- Main Content -->
<main class="max-w-5xl mx-auto px-4 sm:px-6 lg:px-8 py-10 space-y-12">
<!-- Header -->
<header class="text-center space-y-4">
<h1 class="text-4xl md:text-5xl font-extrabold gradient-text leading-tight">
Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion
</h1>
<h4 class="text-xl font-semibold">ICML 2024</h4>
<div class="flex flex-wrap justify-center gap-6 text-lg text-indigo-300">
<a href="https://www.linkedin.com/in/hilamanor/" target="_blank" class="hover:text-indigo-400">Hila Manor</a>
<span>|</span>
<a href="https://tomer.net.technion.ac.il/" target="_blank" class="hover:text-indigo-400">Tomer Michaeli</a>
</div>
<p class="text-indigo-200">Technion - Israel Institute of Technology</p>
<div class="flex flex-wrap justify-center gap-4 mt-4">
<a href="https://arxiv.org/abs/2402.10009" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
<i class="ai ai-arxiv text-xl"></i> ArXiv
</a>
<a href="https://github.com/HilaManor/AudioEditingCode/" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
<i class="fab fa-github text-xl"></i> Code
</a>
<a href="https://youtu.be/lBnldOQVbS4" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
<i class="fab fa-youtube text-xl"></i> Presentation
</a>
<a href="https://huggingface.co/spaces/hilamanor/audioEditing/" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow">
🤗 Text-Based Space
</a>
</div>
</header>
<!-- Abstract + Teaser -->
<section class="glass p-6 rounded-xl shadow-lg">
<img src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/teaser.png" alt="Teaser" class="mx-auto rounded-lg shadow-lg max-w-full h-auto mb-6" />
<h2 class="text-2xl font-semibold mb-4 text-center">Abstract</h2>
<p class="leading-relaxed text-indigo-100 max-w-3xl mx-auto text-center">
Editing signals using large pre-trained models, in a zero-shot manner, has recently seen rapid advancements in the image domain. However, this wave has yet to reach the audio domain.
In this paper, we explore two zero-shot editing techniques for audio signals, which use DDPM inversion on pre-trained diffusion models. The first, adopted from the image domain, allows text-based editing.
The second, is a novel approach for discovering semantically meaningful editing directions without supervision.
When applied to music signals, this method exposes a range of musically interesting modifications, from controlling the participation of specific instruments to improvisations on the melody.
</p>
</section>
<!-- Video Overview -->
<section class="text-center space-y-4">
<h2 class="text-2xl font-semibold">Video Overview</h2>
<p class="text-indigo-300 max-w-xl mx-auto">
For people in a hurry. Images generated by <a href="https://openai.com/dall-e-2" target="_blank" class="text-indigo-400 hover:underline">DALL-E 2</a> and <a href="https://www.bing.com/copilot" target="_blank" class="text-indigo-400 hover:underline">Copilot</a>.
</p>
<video controls class="mx-auto rounded-lg shadow-lg max-w-full w-full sm:w-3/4 md:w-2/3" preload="metadata">
<source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/overview.mp4" type="video/mp4" />
Your browser does not support the video tag.
</video>
</section>
<!-- Samples of Editing -->
<section>
<h2 class="text-3xl font-bold mb-6 text-center">1. Samples of Editing</h2>
<p class="text-center text-indigo-300 mb-8 max-w-3xl mx-auto">
We present samples of audio editing using our proposed methods. The samples are organized into two sections: text-based editing and unsupervised editing.
</p>
<!-- Text-Based Editing Table -->
<h3 id="supsamples" class="text-2xl font-semibold mb-4">1.1. Samples of Text-Based Editing</h3>
<div class="overflow-x-auto rounded-lg shadow-lg">
<table class="min-w-full text-left text-indigo-100 border border-indigo-700 rounded-lg table-auto">
<thead class="bg-indigo-900/90">
<tr>
<th class="px-3 py-2 border border-indigo-700">#</th>
<th class="px-3 py-2 border border-indigo-700">Source Prompt</th>
<th class="px-3 py-2 border border-indigo-700">Target Prompt</th>
<th class="px-3 py-2 border border-indigo-700">Original Audio</th>
<th class="px-3 py-2 border border-indigo-700">Edited Audio</th>
<th class="px-3 py-2 border border-indigo-700">Edit T<sub>start</sub></th>
</tr>
</thead>
<tbody>
<tr class="hover:bg-indigo-700/30 transition">
<td class="border border-indigo-700 px-3 py-2">1</td>
<td class="border border-indigo-700 px-3 py-2">A recording of a <b>sneaky jazz</b> song.</td>
<td class="border border-indigo-700 px-3 py-2">A recording of a <b>tense classical</b> music score.</td>
<td class="border border-indigo-700 px-3 py-2">
<audio controls preload="metadata" class="w-40 rounded">
<source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/orig/MDDBFreeJazz.mp3" type="audio/mp4" />
Your browser does not support the audio element.
</audio>
</td>
<td class="border border-indigo-700 px-3 py-2">
<audio controls preload="metadata" class="w-40 rounded">
<source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/sup_samples/orchestra_MDDBFreeJazz_ours_90.mp3" type="audio/mp4" />
Your browser does not support the audio element.
</audio>
</td>
<td class="border border-indigo-700 px-3 py-2 text-center">110</td>
</tr>
<tr class="hover:bg-indigo-700/30 transition">
<td class="border border-indigo-700 px-3 py-2">2</td>
<td class="border border-indigo-700 px-3 py-2">A recording of a <b>hard rock</b> song.</td>
<td class="border border-indigo-700 px-3 py-2">A recording of a <b>jazz</b> song.</td>
<td class="border border-indigo-700 px-3 py-2">
<audio controls preload="metadata" class="w-40 rounded">
<source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/orig/MDDBZeppelin.mp3" type="audio/mp4" />
Your browser does not support the audio element.
</audio>
</td>
<td class="border border-indigo-700 px-3 py-2">
<audio controls preload="metadata" class="w-40 rounded">
<source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/sup_samples/jazz_MDDBZeppelin_ours_100.mp3" type="audio/mp4" />
Your browser does not support the audio element.
</audio>
</td>
<td class="border border-indigo-700 px-3 py-2 text-center">100</td>
</tr>
<!-- Add more rows as needed -->
</tbody>
</table>
</div>
</section>
</main>
<!-- Back to Top Button -->
<button id="btn-back-to-top" class="fixed bottom-8 right-8 p-4 rounded-full text-white shadow-lg hidden z-50" aria-label="Back to top">
<i class="fas fa-arrow-up"></i>
</button>
<script>
// Mobile menu toggle
const menuBtn = document.getElementById('mobile-menu-btn');
const menu = document.getElementById('mobile-menu');
menuBtn.addEventListener('click', () => {
menu.classList.toggle('hidden');
});
// Back to top button visibility and scroll
const backToTopBtn = document.getElementById('btn-back-to-top');
window.addEventListener('scroll', () => {
if (window.scrollY > 300) {
backToTopBtn.classList.remove('hidden');
} else {
backToTopBtn.classList.add('hidden');
}
});
|