Spaces:
Running
Running
| <html lang="en" > | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <title>Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion</title> | |
| <link rel="shortcut icon" href="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/favicon.ico" /> | |
| <meta property="og:image" content="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/teaser.png"/> | |
| <meta property="og:title" content="Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion" /> | |
| <meta property="og:description" content="Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion" /> | |
| <!-- Tailwind CSS CDN --> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <!-- Google Fonts --> | |
| <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap" rel="stylesheet" /> | |
| <!-- Font Awesome --> | |
| <script src="https://kit.fontawesome.com/ad96f96272.js" crossorigin="anonymous"></script> | |
| <!-- Academicons --> | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css" /> | |
| <style> | |
| body { | |
| font-family: 'Poppins', sans-serif; | |
| background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%); | |
| color: #e0e7ff; | |
| min-height: 100vh; | |
| } | |
| /* Glassmorphism container */ | |
| .glass { | |
| background: rgba(30, 41, 59, 0.75); | |
| backdrop-filter: blur(12px); | |
| border-radius: 1rem; | |
| border: 1px solid rgba(255, 255, 255, 0.1); | |
| box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37); | |
| } | |
| /* Gradient text */ | |
| .gradient-text { | |
| background: linear-gradient(90deg, #3b82f6, #8b5cf6); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| /* Scrollbar for tables */ | |
| .scrollbar-thin::-webkit-scrollbar { | |
| height: 6px; | |
| } | |
| .scrollbar-thin::-webkit-scrollbar-thumb { | |
| background-color: #6366f1; | |
| border-radius: 10px; | |
| } | |
| /* Back to top button */ | |
| #btn-back-to-top { | |
| background: linear-gradient(90deg, #3b82f6, #8b5cf6); | |
| box-shadow: 0 4px 15px rgba(59, 130, 246, 0.5); | |
| } | |
| #btn-back-to-top:hover { | |
| background: linear-gradient(90deg, #8b5cf6, #3b82f6); | |
| } | |
| </style> | |
| </head> | |
| <body class="relative"> | |
| <!-- Navigation --> | |
| <nav class="glass sticky top-0 z-50 shadow-lg"> | |
| <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8"> | |
| <div class="flex justify-between items-center h-16"> | |
| <a href="#" class="text-2xl font-extrabold gradient-text tracking-tight">Zero-Shot Audio Editing</a> | |
| <div class="hidden md:flex space-x-8 text-gray-300 font-semibold"> | |
| <a href="#supsamples" class="hover:text-indigo-400 transition">Text-based Editing</a> | |
| <a href="#unsupsamples" class="hover:text-indigo-400 transition">Unsup. Editing</a> | |
| <a href="#supcomparisons" class="hover:text-indigo-400 transition">Text-Based Editing Comp.</a> | |
| <a href="#unsupcomparisons" class="hover:text-indigo-400 transition">Unsup. Editing Comp.</a> | |
| </div> | |
| <button id="mobile-menu-btn" class="md:hidden text-gray-300 hover:text-indigo-400 focus:outline-none text-2xl"> | |
| <i class="fas fa-bars"></i> | |
| </button> | |
| </div> | |
| <!-- Mobile menu --> | |
| <div id="mobile-menu" class="hidden flex-col space-y-3 pb-4 text-gray-300 font-semibold md:hidden"> | |
| <a href="#supsamples" class="block hover:text-indigo-400 transition">Text-based Editing</a> | |
| <a href="#unsupsamples" class="block hover:text-indigo-400 transition">Unsup. Editing</a> | |
| <a href="#supcomparisons" class="block hover:text-indigo-400 transition">Text-Based Editing Comp.</a> | |
| <a href="#unsupcomparisons" class="block hover:text-indigo-400 transition">Unsup. Editing Comp.</a> | |
| </div> | |
| </div> | |
| </nav> | |
| <!-- Main Content --> | |
| <main class="max-w-5xl mx-auto px-4 sm:px-6 lg:px-8 py-10 space-y-12"> | |
| <!-- Header --> | |
| <header class="text-center space-y-4"> | |
| <h1 class="text-4xl md:text-5xl font-extrabold gradient-text leading-tight"> | |
| Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion | |
| </h1> | |
| <h4 class="text-xl font-semibold">ICML 2024</h4> | |
| <div class="flex flex-wrap justify-center gap-6 text-lg text-indigo-300"> | |
| <a href="https://www.linkedin.com/in/hilamanor/" target="_blank" class="hover:text-indigo-400">Hila Manor</a> | |
| <span>|</span> | |
| <a href="https://tomer.net.technion.ac.il/" target="_blank" class="hover:text-indigo-400">Tomer Michaeli</a> | |
| </div> | |
| <p class="text-indigo-200">Technion - Israel Institute of Technology</p> | |
| <div class="flex flex-wrap justify-center gap-4 mt-4"> | |
| <a href="https://arxiv.org/abs/2402.10009" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow"> | |
| <i class="ai ai-arxiv text-xl"></i> ArXiv | |
| </a> | |
| <a href="https://github.com/HilaManor/AudioEditingCode/" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow"> | |
| <i class="fab fa-github text-xl"></i> Code | |
| </a> | |
| <a href="https://youtu.be/lBnldOQVbS4" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow"> | |
| <i class="fab fa-youtube text-xl"></i> Presentation | |
| </a> | |
| <a href="https://huggingface.co/spaces/hilamanor/audioEditing/" target="_blank" class="inline-flex items-center gap-2 px-5 py-2 rounded-lg bg-indigo-600 hover:bg-indigo-700 transition shadow"> | |
| 🤗 Text-Based Space | |
| </a> | |
| </div> | |
| </header> | |
| <!-- Abstract + Teaser --> | |
| <section class="glass p-6 rounded-xl shadow-lg"> | |
| <img src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/teaser.png" alt="Teaser" class="mx-auto rounded-lg shadow-lg max-w-full h-auto mb-6" /> | |
| <h2 class="text-2xl font-semibold mb-4 text-center">Abstract</h2> | |
| <p class="leading-relaxed text-indigo-100 max-w-3xl mx-auto text-center"> | |
| Editing signals using large pre-trained models, in a zero-shot manner, has recently seen rapid advancements in the image domain. However, this wave has yet to reach the audio domain. | |
| In this paper, we explore two zero-shot editing techniques for audio signals, which use DDPM inversion on pre-trained diffusion models. The first, adopted from the image domain, allows text-based editing. | |
| The second, is a novel approach for discovering semantically meaningful editing directions without supervision. | |
| When applied to music signals, this method exposes a range of musically interesting modifications, from controlling the participation of specific instruments to improvisations on the melody. | |
| </p> | |
| </section> | |
| <!-- Video Overview --> | |
| <section class="text-center space-y-4"> | |
| <h2 class="text-2xl font-semibold">Video Overview</h2> | |
| <p class="text-indigo-300 max-w-xl mx-auto"> | |
| For people in a hurry. Images generated by <a href="https://openai.com/dall-e-2" target="_blank" class="text-indigo-400 hover:underline">DALL-E 2</a> and <a href="https://www.bing.com/copilot" target="_blank" class="text-indigo-400 hover:underline">Copilot</a>. | |
| </p> | |
| <video controls class="mx-auto rounded-lg shadow-lg max-w-full w-full sm:w-3/4 md:w-2/3" preload="metadata"> | |
| <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/overview.mp4" type="video/mp4" /> | |
| Your browser does not support the video tag. | |
| </video> | |
| </section> | |
| <!-- Samples of Editing --> | |
| <section> | |
| <h2 class="text-3xl font-bold mb-6 text-center">1. Samples of Editing</h2> | |
| <p class="text-center text-indigo-300 mb-8 max-w-3xl mx-auto"> | |
| We present samples of audio editing using our proposed methods. The samples are organized into two sections: text-based editing and unsupervised editing. | |
| </p> | |
| <!-- Text-Based Editing Table --> | |
| <h3 id="supsamples" class="text-2xl font-semibold mb-4">1.1. Samples of Text-Based Editing</h3> | |
| <div class="overflow-x-auto rounded-lg shadow-lg"> | |
| <table class="min-w-full text-left text-indigo-100 border border-indigo-700 rounded-lg table-auto"> | |
| <thead class="bg-indigo-900/90"> | |
| <tr> | |
| <th class="px-3 py-2 border border-indigo-700">#</th> | |
| <th class="px-3 py-2 border border-indigo-700">Source Prompt</th> | |
| <th class="px-3 py-2 border border-indigo-700">Target Prompt</th> | |
| <th class="px-3 py-2 border border-indigo-700">Original Audio</th> | |
| <th class="px-3 py-2 border border-indigo-700">Edited Audio</th> | |
| <th class="px-3 py-2 border border-indigo-700">Edit T<sub>start</sub></th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr class="hover:bg-indigo-700/30 transition"> | |
| <td class="border border-indigo-700 px-3 py-2">1</td> | |
| <td class="border border-indigo-700 px-3 py-2">A recording of a <b>sneaky jazz</b> song.</td> | |
| <td class="border border-indigo-700 px-3 py-2">A recording of a <b>tense classical</b> music score.</td> | |
| <td class="border border-indigo-700 px-3 py-2"> | |
| <audio controls preload="metadata" class="w-40 rounded"> | |
| <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/orig/MDDBFreeJazz.mp3" type="audio/mp4" /> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </td> | |
| <td class="border border-indigo-700 px-3 py-2"> | |
| <audio controls preload="metadata" class="w-40 rounded"> | |
| <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/sup_samples/orchestra_MDDBFreeJazz_ours_90.mp3" type="audio/mp4" /> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </td> | |
| <td class="border border-indigo-700 px-3 py-2 text-center">110</td> | |
| </tr> | |
| <tr class="hover:bg-indigo-700/30 transition"> | |
| <td class="border border-indigo-700 px-3 py-2">2</td> | |
| <td class="border border-indigo-700 px-3 py-2">A recording of a <b>hard rock</b> song.</td> | |
| <td class="border border-indigo-700 px-3 py-2">A recording of a <b>jazz</b> song.</td> | |
| <td class="border border-indigo-700 px-3 py-2"> | |
| <audio controls preload="metadata" class="w-40 rounded"> | |
| <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/orig/MDDBZeppelin.mp3" type="audio/mp4" /> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </td> | |
| <td class="border border-indigo-700 px-3 py-2"> | |
| <audio controls preload="metadata" class="w-40 rounded"> | |
| <source src="https://github.com/HilaManor/AudioEditingCode/raw/refs/heads/codeclean/docs/resources/audio/sup_samples/jazz_MDDBZeppelin_ours_100.mp3" type="audio/mp4" /> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </td> | |
| <td class="border border-indigo-700 px-3 py-2 text-center">100</td> | |
| </tr> | |
| <!-- Add more rows as needed --> | |
| </tbody> | |
| </table> | |
| </div> | |
| </section> | |
| </main> | |
| <!-- Back to Top Button --> | |
| <button id="btn-back-to-top" class="fixed bottom-8 right-8 p-4 rounded-full text-white shadow-lg hidden z-50" aria-label="Back to top"> | |
| <i class="fas fa-arrow-up"></i> | |
| </button> | |
| <script> | |
| // Mobile menu toggle | |
| const menuBtn = document.getElementById('mobile-menu-btn'); | |
| const menu = document.getElementById('mobile-menu'); | |
| menuBtn.addEventListener('click', () => { | |
| menu.classList.toggle('hidden'); | |
| }); | |
| // Back to top button visibility and scroll | |
| const backToTopBtn = document.getElementById('btn-back-to-top'); | |
| window.addEventListener('scroll', () => { | |
| if (window.scrollY > 300) { | |
| backToTopBtn.classList.remove('hidden'); | |
| } else { | |
| backToTopBtn.classList.add('hidden'); | |
| } | |
| }); | |