Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	| # Copyright 2024 The YourMT3 Authors. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Please see the details in the LICENSE file. | |
| """ install_dataset.py """ | |
| import os | |
| import argparse | |
| import mirdata | |
| from typing import Optional, Tuple, Union | |
| from utils.preprocess.generate_dataset_stats import generate_dataset_stats_for_all_datasets, update_dataset_stats_for_new_dataset | |
| from utils.mirdata_dev.datasets import slakh16k | |
| from utils.preprocess.preprocess_slakh import preprocess_slakh16k, add_program_and_is_drum_info_to_file_list | |
| from utils.preprocess.preprocess_musicnet import preprocess_musicnet16k | |
| from utils.preprocess.preprocess_maps import preprocess_maps16k | |
| from utils.preprocess.preprocess_maestro import preprocess_maestro16k | |
| from utils.preprocess.preprocess_guitarset import preprocess_guitarset16k, create_filelist_by_style_guitarset16k | |
| from utils.preprocess.preprocess_enstdrums import preprocess_enstdrums16k, create_filelist_dtm_random_enstdrums16k | |
| from utils.preprocess.preprocess_mir_st500 import preprocess_mir_st500_16k | |
| from utils.preprocess.preprocess_cmedia import preprocess_cmedia_16k | |
| from utils.preprocess.preprocess_rwc_pop_full import preprocess_rwc_pop_full16k | |
| from utils.preprocess.preprocess_rwc_pop import preprocess_rwc_pop16k | |
| from utils.preprocess.preprocess_egmd import preprocess_egmd16k | |
| from utils.preprocess.preprocess_mir1k import preprocess_mir1k_16k | |
| from utils.preprocess.preprocess_urmp import preprocess_urmp16k | |
| from utils.preprocess.preprocess_idmt_smt_bass import preprocess_idmt_smt_bass_16k | |
| from utils.preprocess.preprocess_geerdes import preprocess_geerdes16k | |
| from utils.utils import download_and_extract #, download_and_extract_zenodo_restricted | |
| # zenodo_token = "eyJhbGciOiJIUzUxMiIsImlhdCI6MTcxMDE1MDYzNywiZXhwIjoxNzEyNzA3MTk5fQ.eyJpZCI6ImRmODA5NzZlLTBjM2QtNDk5NS05YjM0LWFiNGM4NzJhMmZhMSIsImRhdGEiOnt9LCJyYW5kb20iOiIwMzY5ZDcxZjc2NTMyN2UyYmVmN2ExYjJkMmMyYTRhNSJ9.0aHnNC-7ivWQO6l8twjLR0NDH4boC0uOolAAmogVt7XRi2PHU5MEKBQoK7-wgDdnmWEIqEIvoLO6p8KTnsY9dg" | |
| def install_slakh(data_home=os.PathLike, no_down=False) -> None: | |
| if not no_down: | |
| ds = slakh16k.Dataset(data_home, version='2100-yourmt3-16k') | |
| ds.download(partial_download=['2100-yourmt3-16k', 'index']) | |
| del (ds) | |
| preprocess_slakh16k(data_home, delete_source_files=False, fix_bass_octave=True) | |
| add_program_and_is_drum_info_to_file_list(data_home) | |
| def install_musicnet(data_home=os.PathLike, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7811639/files/musicnet_yourmt3_16k.tar.gz?download=1" | |
| checksum = "a2da7c169e26d452a4e8b9bef498b3d7" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_musicnet16k(data_home, dataset_name='musicnet') | |
| def install_maps(data_home=os.PathLike, no_down=False, sanity_check=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7812075/files/maps_yourmt3_16k.tar.gz?download=1" | |
| checksum = "6b070d162c931cd5e69c16ef2398a649" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_maps16k(data_home, dataset_name='maps', ignore_pedal=False, sanity_check=sanity_check) | |
| def install_maestro(data_home=os.PathLike, no_down=False, sanity_check=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7852176/files/maestro_yourmt3_16k.tar.gz?download=1" | |
| checksum = "c17c6a188d936e5ff3870ef27144d397" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_maestro16k(data_home, dataset_name='maestro', ignore_pedal=False, sanity_check=sanity_check) | |
| def install_guitarset(data_home=os.PathLike, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7831843/files/guitarset_yourmt3_16k.tar.gz?download=1" | |
| checksum = "e3cfe0cc9394d91d9c290ce888821360" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_guitarset16k(data_home, dataset_name='guitarset') | |
| create_filelist_by_style_guitarset16k(data_home, dataset_name='guitarset') | |
| def install_enstdrums(data_home, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7831843/files/enstdrums_yourmt3_16k.tar.gz?download=1" | |
| checksum = "7e28c2a923e4f4162b3d83877cedb5eb" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_enstdrums16k(data_home, dataset_name='enstdrums') | |
| create_filelist_dtm_random_enstdrums16k(data_home, dataset_name='enstdrums') | |
| def install_egmd(data_home, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7831072/files/egmc_yourmt3_16k.tar.gz?download=1" | |
| checksum = "4f615157ea4c52a64c6c9dcf68bf2bde" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_egmd16k(data_home, dataset_name='egmd') | |
| def install_mirst500(data_home, zenodo_token, no_down=False, sanity_check=True, apply_correction=False) -> None: | |
| """ Update Oct 2023: MIR-ST500 with FULL audio files""" | |
| if not no_down: | |
| url = "https://zenodo.org/records/10016397/files/mir_st500_yourmt3_16k.tar.gz?download=1" | |
| checksum = "98eb52eb2456ce4034e21750f309da13" | |
| download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token) | |
| preprocess_mir_st500_16k(data_home, dataset_name='mir_st500', sanity_check=sanity_check) | |
| def install_cmedia(data_home, zenodo_token, no_down=False, sanity_check=True) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/records/10016397/files/cmedia_yourmt3_16k.tar.gz?download=1" | |
| checksum = "e6cca23577ba7588e9ed9711a398f7cf" | |
| download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token) | |
| preprocess_cmedia_16k(data_home, dataset_name='cmedia', sanity_check=sanity_check, apply_correction=True) | |
| def install_rwc_pop(data_home, zenodo_token, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/records/10016397/files/rwc_pop_yourmt3_16k.tar.gz?download=1" | |
| checksum = "ad459f9fa1b6b87676b2fb37c0ba5dfc" | |
| download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token) | |
| preprocess_rwc_pop16k(data_home, dataset_name='rwc_pop') # bass transcriptions | |
| preprocess_rwc_pop_full16k(data_home, dataset_name='rwc_pop') # full transcriptions | |
| def install_mir1k(data_home, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/7955481/files/mir1k_yourmt3_16k.tar.gz?download=1" | |
| checksum = "4cbac56a4e971432ca807efd5cb76d67" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| # preprocess_mir1k_16k(data_home, dataset_name='mir1k') | |
| def install_urmp(data_home, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/record/8021437/files/urmp_yourmt3_16k.tar.gz?download=1" | |
| checksum = "4f539c71678a77ba34f6dfca41072102" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_urmp16k(data_home, dataset_name='urmp') | |
| def install_idmt_smt_bass(data_home, no_down=False) -> None: | |
| if not no_down: | |
| url = "https://zenodo.org/records/10009959/files/idmt_smt_bass_yourmt3_16k.tar.gz?download=1" | |
| checksum = "0c95f91926a1e95b1f5d075c05b7eb76" | |
| download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) | |
| preprocess_idmt_smt_bass_16k(data_home, dataset_name='idmt_smt_bass', sanity_check=True, | |
| edit_audio=False) # the donwloaded audio has already been edited | |
| def install_random_nsynth(data_home, no_down=False) -> None: | |
| return | |
| def install_geerdes(data_home) -> None: | |
| try: | |
| preprocess_geerdes16k(data_home, dataset_name='geerdes', sanity_check=False) | |
| except Exception as e: | |
| print(e) | |
| print("Geerdes dataset is not available for download. Please contact the dataset provider.") | |
| def regenerate_dataset_stats(data_home) -> None: | |
| generate_dataset_stats_for_all_datasets(data_home) | |
| def get_cached_zenodo_token() -> str: | |
| # check if cached token exists | |
| if not os.path.exists('.cached_zenodo_token'): | |
| raise Exception("Cached Zenodo token not found. Please enter your Zenodo token.") | |
| # read cached token | |
| with open('.cached_zenodo_token', 'r') as f: | |
| zenodo_token = f.read().strip() | |
| print(f"Using cached Zenodo token: {zenodo_token}") | |
| return zenodo_token | |
| def cache_zenodo_token(zenodo_token: str) -> None: | |
| with open('.cached_zenodo_token', 'w') as f: | |
| f.write(zenodo_token) | |
| print("Your Zenodo token is cached.") | |
| def option_prompt(data_home: os.PathLike, no_download: bool = False) -> None: | |
| print("Select the dataset(s) to install (enter comma-separated numbers):") | |
| print("1. Slakh") | |
| print("2. MusicNet") | |
| print("3. MAPS") | |
| print("4. Maestro") | |
| print("5. GuitarSet") | |
| print("6. ENST-drums") | |
| print("7. EGMD") | |
| print("8. MIR-ST500 ** Restricted Access **") | |
| print("9. CMedia ** Restricted Access **") | |
| print("10. RWC-Pop (Bass and Full) ** Restricted Access **") | |
| print("11. MIR-1K (NOT SUPPORTED)") | |
| print("12. URMP") | |
| print("13. IDMT-SMT-Bass") | |
| print("14. Random-NSynth") | |
| print("15. Geerdes") | |
| print("16. Regenerate Dataset Stats (experimental)") | |
| print("17. Request Token for ** Restricted Access **") | |
| print("18. Exit") | |
| choice = input("Enter your choices (multiple choices with comma): ") | |
| choices = [c.strip() for c in choice.split(',')] | |
| if "18" in choices: | |
| print("Exiting.") | |
| else: | |
| # ask for Zenodo token | |
| for c in choices: | |
| if int(c) in [8, 9, 10]: | |
| if no_download is True: | |
| zenodo_token = None | |
| else: | |
| zenodo_token = input("Enter Zenodo token, or press enter to use the cached token:") | |
| if zenodo_token == "": | |
| zenodo_token = get_cached_zenodo_token() | |
| else: | |
| cache_zenodo_token(zenodo_token) | |
| break | |
| if "1" in choices: | |
| install_slakh(data_home, no_down=no_download) | |
| if "2" in choices: | |
| install_musicnet(data_home, no_down=no_download) | |
| if "3" in choices: | |
| install_maps(data_home, no_down=no_download) | |
| if "4" in choices: | |
| install_maestro(data_home, no_down=no_download) | |
| if "5" in choices: | |
| install_guitarset(data_home, no_down=no_download) | |
| if "6" in choices: | |
| install_enstdrums(data_home, no_down=no_download) | |
| if "7" in choices: | |
| install_egmd(data_home, no_down=no_download) | |
| if "8" in choices: | |
| install_mirst500(data_home, zenodo_token, no_down=no_download) | |
| if "9" in choices: | |
| install_cmedia(data_home, zenodo_token, no_down=no_download) | |
| if "10" in choices: | |
| install_rwc_pop(data_home, zenodo_token, no_down=no_download) | |
| if "11" in choices: | |
| install_mir1k(data_home, no_down=no_download) | |
| if "12" in choices: | |
| install_urmp(data_home, no_down=no_download) | |
| if "13" in choices: | |
| install_idmt_smt_bass(data_home, no_down=no_download) | |
| if "14" in choices: | |
| install_random_nsynth(data_home, no_down=no_download) | |
| if "15" in choices: | |
| install_geerdes(data_home) # not available for download | |
| if "16" in choices: | |
| regenerate_dataset_stats(data_home, no_down=no_download) | |
| if "17" in choices: | |
| print("\nPlease visit https://zenodo.org/records/10016397 to request a Zenodo token.") | |
| print("Upon submitting your request, you will receive an email with a link labeled 'Access the record'.") | |
| print("Copy the token that follows 'token=' in that link.") | |
| if not any(int(c) in range(16) for c in choices): | |
| print("Invalid choice(s). Please enter valid numbers separated by commas.") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description='Dataset installer script.') | |
| # data home dir | |
| parser.add_argument( | |
| 'data_home', | |
| type=str, | |
| nargs='?', | |
| default=None, | |
| help='Path to data home directory. If None, use the default path defined in src/config/config.py') | |
| # `no_download` option | |
| parser.add_argument('--nodown', | |
| '-nd', | |
| action='store_true', | |
| help='Flag to control downloading. If set, no downloading will occur.') | |
| args = parser.parse_args() | |
| if args.data_home is None: | |
| from config.config import shared_cfg | |
| data_home = shared_cfg["PATH"]["data_home"] | |
| else: | |
| data_home = args.data_home | |
| os.makedirs(data_home, exist_ok=True) | |
| no_download = args.nodown | |
| option_prompt(data_home, no_download) | |
