Spaces:

PFEemp2024
/

DCWIR-Demo

Runtime error

App Files Files Community

DCWIR-Demo / textattack /shared /utils /install.py

PFEemp2024

add necessary file

63775f2 over 1 year ago

raw

history blame

7.37 kB

	import logging.config
	import os
	import pathlib
	import shutil
	import sys
	import tempfile
	import zipfile

	import filelock
	import requests
	import tqdm

	# Hide an error message from `tokenizers` if this process is forked.
	os.environ["TOKENIZERS_PARALLELISM"] = "True"


	def path_in_cache(file_path):
	try:
	os.makedirs(TEXTATTACK_CACHE_DIR)
	except FileExistsError: # cache path exists
	pass
	return os.path.join(TEXTATTACK_CACHE_DIR, file_path)


	def s3_url(uri):
	return "https://textattack.s3.amazonaws.com/" + uri


	def download_from_s3(folder_name, skip_if_cached=True):
	"""Folder name will be saved as `<cache_dir>/textattack/<folder_name>`. If
	it doesn't exist on disk, the zip file will be downloaded and extracted.

	Args:
	folder_name (str): path to folder or file in cache
	skip_if_cached (bool): If `True`, skip downloading if content is already cached.

	Returns:
	str: path to the downloaded folder or file on disk
	"""
	cache_dest_path = path_in_cache(folder_name)
	os.makedirs(os.path.dirname(cache_dest_path), exist_ok=True)
	# Use a lock to prevent concurrent downloads.
	cache_dest_lock_path = cache_dest_path + ".lock"
	cache_file_lock = filelock.FileLock(cache_dest_lock_path)
	cache_file_lock.acquire()
	# Check if already downloaded.
	if skip_if_cached and os.path.exists(cache_dest_path):
	cache_file_lock.release()
	return cache_dest_path
	# If the file isn't found yet, download the zip file to the cache.
	downloaded_file = tempfile.NamedTemporaryFile(
	dir=TEXTATTACK_CACHE_DIR, suffix=".zip", delete=False
	)
	folder_s3_url = s3_url(folder_name)
	http_get(folder_s3_url, downloaded_file)
	# Move or unzip the file.
	downloaded_file.close()
	if zipfile.is_zipfile(downloaded_file.name):
	unzip_file(downloaded_file.name, cache_dest_path)
	else:
	logger.info(f"Copying {downloaded_file.name} to {cache_dest_path}.")
	shutil.copyfile(downloaded_file.name, cache_dest_path)
	cache_file_lock.release()
	# Remove the temporary file.
	os.remove(downloaded_file.name)
	logger.info(f"Successfully saved {folder_name} to cache.")
	return cache_dest_path


	def download_from_url(url, save_path, skip_if_cached=True):
	"""Downloaded file will be saved under
	`<cache_dir>/textattack/<save_path>`. If it doesn't exist on disk, the zip
	file will be downloaded and extracted.

	Args:
	url (str): URL path from which to download.
	save_path (str): path to which to save the downloaded content.
	skip_if_cached (bool): If `True`, skip downloading if content is already cached.

	Returns:
	str: path to the downloaded folder or file on disk
	"""
	cache_dest_path = path_in_cache(save_path)
	os.makedirs(os.path.dirname(cache_dest_path), exist_ok=True)
	# Use a lock to prevent concurrent downloads.
	cache_dest_lock_path = cache_dest_path + ".lock"
	cache_file_lock = filelock.FileLock(cache_dest_lock_path)
	cache_file_lock.acquire()
	# Check if already downloaded.
	if skip_if_cached and os.path.exists(cache_dest_path):
	cache_file_lock.release()
	return cache_dest_path
	# If the file isn't found yet, download the zip file to the cache.
	downloaded_file = tempfile.NamedTemporaryFile(
	dir=TEXTATTACK_CACHE_DIR, suffix=".zip", delete=False
	)
	http_get(url, downloaded_file)
	# Move or unzip the file.
	downloaded_file.close()
	if zipfile.is_zipfile(downloaded_file.name):
	unzip_file(downloaded_file.name, cache_dest_path)
	else:
	logger.info(f"Copying {downloaded_file.name} to {cache_dest_path}.")
	shutil.copyfile(downloaded_file.name, cache_dest_path)
	cache_file_lock.release()
	# Remove the temporary file.
	os.remove(downloaded_file.name)
	logger.info(f"Successfully saved {url} to cache.")
	return cache_dest_path


	def unzip_file(path_to_zip_file, unzipped_folder_path):
	"""Unzips a .zip file to folder path."""
	logger.info(f"Unzipping file {path_to_zip_file} to {unzipped_folder_path}.")
	enclosing_unzipped_path = pathlib.Path(unzipped_folder_path).parent
	with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
	zip_ref.extractall(enclosing_unzipped_path)


	def http_get(url, out_file, proxies=None):
	"""Get contents of a URL and save to a file.

	https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
	"""
	logger.info(f"Downloading {url}.")
	req = requests.get(url, stream=True, proxies=proxies)
	content_length = req.headers.get("Content-Length")
	total = int(content_length) if content_length is not None else None
	if req.status_code == 403 or req.status_code == 404:
	raise Exception(f"Could not reach {url}.")
	progress = tqdm.tqdm(unit="B", unit_scale=True, total=total)
	for chunk in req.iter_content(chunk_size=1024):
	if chunk: # filter out keep-alive new chunks
	progress.update(len(chunk))
	out_file.write(chunk)
	progress.close()


	if sys.stdout.isatty():
	LOG_STRING = "\033[34;1mtextattack\033[0m"
	else:
	LOG_STRING = "textattack"
	logger = logging.getLogger(__name__)
	logging.config.dictConfig(
	{"version": 1, "loggers": {__name__: {"level": logging.INFO}}}
	)
	formatter = logging.Formatter(f"{LOG_STRING}: %(message)s")
	stream_handler = logging.StreamHandler()
	stream_handler.setFormatter(formatter)
	logger.addHandler(stream_handler)
	logger.propagate = False


	def _post_install():
	logger.info("Updating TextAttack package dependencies.")
	logger.info("Downloading NLTK required packages.")
	import nltk

	nltk.download("averaged_perceptron_tagger")
	nltk.download("stopwords")
	nltk.download("omw")
	nltk.download("universal_tagset")
	nltk.download("wordnet")
	nltk.download("punkt")

	try:
	import stanza

	stanza.download("en")
	except Exception:
	pass


	def set_cache_dir(cache_dir):
	"""Sets all relevant cache directories to ``TA_CACHE_DIR``."""
	# Tensorflow Hub cache directory
	os.environ["TFHUB_CACHE_DIR"] = cache_dir
	# HuggingFace `transformers` cache directory
	os.environ["PYTORCH_TRANSFORMERS_CACHE"] = cache_dir
	# HuggingFace `datasets` cache directory
	os.environ["HF_HOME"] = cache_dir
	# Basic directory for Linux user-specific non-data files
	os.environ["XDG_CACHE_HOME"] = cache_dir


	def _post_install_if_needed():
	"""Runs _post_install if hasn't been run since install."""
	# Check for post-install file.
	post_install_file_path = path_in_cache("post_install_check_3")
	post_install_file_lock_path = post_install_file_path + ".lock"
	post_install_file_lock = filelock.FileLock(post_install_file_lock_path)
	post_install_file_lock.acquire()
	if os.path.exists(post_install_file_path):
	post_install_file_lock.release()
	return
	# Run post-install.
	_post_install()
	# Create file that indicates post-install completed.
	open(post_install_file_path, "w").close()
	post_install_file_lock.release()


	TEXTATTACK_CACHE_DIR = os.environ.get(
	"TA_CACHE_DIR", os.path.expanduser("~/.cache/textattack")
	)
	if "TA_CACHE_DIR" in os.environ:
	set_cache_dir(os.environ["TA_CACHE_DIR"])


	_post_install_if_needed()