Spaces:
Runtime error
Runtime error
| # Copyright (c) 2015-present, Facebook, Inc. | |
| # All rights reserved. | |
| """ | |
| Misc functions, including distributed helpers. | |
| Mostly copy-paste from torchvision references. | |
| """ | |
| import io | |
| import os | |
| import time | |
| from collections import defaultdict, deque | |
| import datetime | |
| import torch | |
| import torch.distributed as dist | |
| import logging | |
| logger_initialized = {} | |
| def group_subnets_by_flops(data, flops_gap=1.0): | |
| sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} | |
| candidate_idx = [] | |
| grouped_cands = [] | |
| last_flops = 0 | |
| for cfg_id, flops in sorted_data.items(): | |
| flops = flops / 1e9 | |
| if abs(last_flops - flops) > flops_gap: | |
| if len(candidate_idx) > 0: | |
| grouped_cands.append(sorted(candidate_idx)) | |
| candidate_idx = [int(cfg_id)] | |
| last_flops = flops | |
| else: | |
| candidate_idx.append(int(cfg_id)) | |
| if len(candidate_idx) > 0: | |
| grouped_cands.append(sorted(candidate_idx)) | |
| return grouped_cands | |
| def find_best_candidates(data): | |
| sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} | |
| candidate_idx = [] | |
| last_flops = 0 | |
| for cfg_id, values in sorted_data.items(): | |
| flops, score = values | |
| if abs(last_flops - flops) > 1: | |
| candidate_idx.append(cfg_id) | |
| last_flops = flops | |
| else: | |
| if score > data[candidate_idx[-1]][1]: | |
| candidate_idx[-1] = cfg_id | |
| return candidate_idx | |
| def find_top_candidates(data, ratio=0.9): | |
| sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} | |
| candidate_idx = [] | |
| grouped_cands = [] | |
| last_flops = 0 | |
| for cfg_id, values in sorted_data.items(): | |
| flops, score = values | |
| if abs(last_flops - flops) > 3: | |
| if len(candidate_idx) > 0: | |
| grouped_cands.append(candidate_idx) | |
| candidate_idx = [cfg_id] | |
| last_flops = flops | |
| else: | |
| candidate_idx.append(cfg_id) | |
| if len(candidate_idx) > 0: | |
| grouped_cands.append(candidate_idx) | |
| final_list = [] | |
| for group in grouped_cands: | |
| if len(group) == 1: | |
| final_list += list(map(int, group)) | |
| continue | |
| scores = torch.tensor([sorted_data[cfg_id][-1] for cfg_id in group]) | |
| indices = torch.argsort(scores, descending=True) | |
| num_selected = int(ratio*len(group)) if int(ratio*len(group)) > 0 else 1 | |
| top_ids = indices[:num_selected].tolist() | |
| selected = [group[idx] for idx in top_ids] | |
| final_list += list(map(int, selected)) | |
| return final_list | |
| def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): | |
| """Initialize and get a logger by name. | |
| If the logger has not been initialized, this method will initialize the | |
| logger by adding one or two handlers, otherwise the initialized logger will | |
| be directly returned. During initialization, a StreamHandler will always be | |
| added. If `log_file` is specified and the process rank is 0, a FileHandler | |
| will also be added. | |
| Args: | |
| name (str): Logger name. | |
| log_file (str | None): The log filename. If specified, a FileHandler | |
| will be added to the logger. | |
| log_level (int): The logger level. Note that only the process of | |
| rank 0 is affected, and other processes will set the level to | |
| "Error" thus be silent most of the time. | |
| file_mode (str): The file mode used in opening log file. | |
| Defaults to 'w'. | |
| Returns: | |
| logging.Logger: The expected logger. | |
| """ | |
| logger = logging.getLogger(name) | |
| if name in logger_initialized: | |
| return logger | |
| # handle hierarchical names | |
| # e.g., logger "a" is initialized, then logger "a.b" will skip the | |
| # initialization since it is a child of "a". | |
| for logger_name in logger_initialized: | |
| if name.startswith(logger_name): | |
| return logger | |
| stream_handler = logging.StreamHandler() | |
| handlers = [stream_handler] | |
| if dist.is_available() and dist.is_initialized(): | |
| rank = dist.get_rank() | |
| else: | |
| rank = 0 | |
| # only rank 0 will add a FileHandler | |
| if rank == 0 and log_file is not None: | |
| # Here, the default behaviour of the official logger is 'a'. Thus, we | |
| # provide an interface to change the file mode to the default | |
| # behaviour. | |
| file_handler = logging.FileHandler(log_file, file_mode) | |
| handlers.append(file_handler) | |
| formatter = logging.Formatter( | |
| '%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| for handler in handlers: | |
| handler.setFormatter(formatter) | |
| handler.setLevel(log_level) | |
| logger.addHandler(handler) | |
| if rank == 0: | |
| logger.setLevel(log_level) | |
| else: | |
| logger.setLevel(logging.ERROR) | |
| logger_initialized[name] = True | |
| return logger | |
| def get_root_logger(log_file=None, log_level=logging.INFO): | |
| """Get the root logger. | |
| The logger will be initialized if it has not been initialized. By default a | |
| StreamHandler will be added. If `log_file` is specified, a FileHandler will | |
| also be added. The name of the root logger is the top-level package name, | |
| e.g., "mmseg". | |
| Args: | |
| log_file (str | None): The log filename. If specified, a FileHandler | |
| will be added to the root logger. | |
| log_level (int): The root logger level. Note that only the process of | |
| rank 0 is affected, while other processes will set the level to | |
| "Error" and be silent most of the time. | |
| Returns: | |
| logging.Logger: The root logger. | |
| """ | |
| logger = get_logger(name='snnet', log_file=log_file, log_level=log_level) | |
| return logger | |
| class SmoothedValue(object): | |
| """Track a series of values and provide access to smoothed values over a | |
| window or the global series average. | |
| """ | |
| def __init__(self, window_size=20, fmt=None): | |
| if fmt is None: | |
| fmt = "{median:.4f} ({global_avg:.4f})" | |
| self.deque = deque(maxlen=window_size) | |
| self.total = 0.0 | |
| self.count = 0 | |
| self.fmt = fmt | |
| def update(self, value, n=1): | |
| self.deque.append(value) | |
| self.count += n | |
| self.total += value * n | |
| def synchronize_between_processes(self): | |
| """ | |
| Warning: does not synchronize the deque! | |
| """ | |
| if not is_dist_avail_and_initialized(): | |
| return | |
| t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') | |
| dist.barrier() | |
| dist.all_reduce(t) | |
| t = t.tolist() | |
| self.count = int(t[0]) | |
| self.total = t[1] | |
| def median(self): | |
| d = torch.tensor(list(self.deque)) | |
| return d.median().item() | |
| def avg(self): | |
| d = torch.tensor(list(self.deque), dtype=torch.float32) | |
| return d.mean().item() | |
| def global_avg(self): | |
| return self.total / self.count | |
| def max(self): | |
| return max(self.deque) | |
| def value(self): | |
| return self.deque[-1] | |
| def __str__(self): | |
| return self.fmt.format( | |
| median=self.median, | |
| avg=self.avg, | |
| global_avg=self.global_avg, | |
| max=self.max, | |
| value=self.value) | |
| class MetricLogger(object): | |
| def __init__(self, delimiter="\t", logger=None): | |
| self.meters = defaultdict(SmoothedValue) | |
| self.delimiter = delimiter | |
| self.logger = logger | |
| def update(self, **kwargs): | |
| for k, v in kwargs.items(): | |
| if isinstance(v, torch.Tensor): | |
| v = v.item() | |
| assert isinstance(v, (float, int)) | |
| self.meters[k].update(v) | |
| def __getattr__(self, attr): | |
| if attr in self.meters: | |
| return self.meters[attr] | |
| if attr in self.__dict__: | |
| return self.__dict__[attr] | |
| raise AttributeError("'{}' object has no attribute '{}'".format( | |
| type(self).__name__, attr)) | |
| def __str__(self): | |
| loss_str = [] | |
| for name, meter in self.meters.items(): | |
| loss_str.append( | |
| "{}: {}".format(name, str(meter)) | |
| ) | |
| return self.delimiter.join(loss_str) | |
| def synchronize_between_processes(self): | |
| for meter in self.meters.values(): | |
| meter.synchronize_between_processes() | |
| def add_meter(self, name, meter): | |
| self.meters[name] = meter | |
| def log_every(self, iterable, print_freq, header=None): | |
| i = 0 | |
| if not header: | |
| header = '' | |
| start_time = time.time() | |
| end = time.time() | |
| iter_time = SmoothedValue(fmt='{avg:.4f}') | |
| data_time = SmoothedValue(fmt='{avg:.4f}') | |
| space_fmt = ':' + str(len(str(len(iterable)))) + 'd' | |
| log_msg = [ | |
| header, | |
| '[{0' + space_fmt + '}/{1}]', | |
| 'eta: {eta}', | |
| '{meters}', | |
| 'time: {time}', | |
| 'data: {data}' | |
| ] | |
| if torch.cuda.is_available(): | |
| log_msg.append('max mem: {memory:.0f}') | |
| log_msg = self.delimiter.join(log_msg) | |
| MB = 1024.0 * 1024.0 | |
| for obj in iterable: | |
| data_time.update(time.time() - end) | |
| yield obj | |
| iter_time.update(time.time() - end) | |
| if i % print_freq == 0 or i == len(iterable) - 1: | |
| eta_seconds = iter_time.global_avg * (len(iterable) - i) | |
| eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) | |
| if torch.cuda.is_available(): | |
| self.logger.info(log_msg.format( | |
| i, len(iterable), eta=eta_string, | |
| meters=str(self), | |
| time=str(iter_time), data=str(data_time), | |
| memory=torch.cuda.max_memory_allocated() / MB)) | |
| else: | |
| self.logger.info(log_msg.format( | |
| i, len(iterable), eta=eta_string, | |
| meters=str(self), | |
| time=str(iter_time), data=str(data_time))) | |
| i += 1 | |
| end = time.time() | |
| total_time = time.time() - start_time | |
| total_time_str = str(datetime.timedelta(seconds=int(total_time))) | |
| self.logger.info('{} Total time: {} ({:.4f} s / it)'.format( | |
| header, total_time_str, total_time / len(iterable))) | |
| def _load_checkpoint_for_ema(model_ema, checkpoint): | |
| """ | |
| Workaround for ModelEma._load_checkpoint to accept an already-loaded object | |
| """ | |
| mem_file = io.BytesIO() | |
| torch.save({'state_dict_ema':checkpoint}, mem_file) | |
| mem_file.seek(0) | |
| model_ema._load_checkpoint(mem_file) | |
| def setup_for_distributed(is_master): | |
| """ | |
| This function disables printing when not in master process | |
| """ | |
| import builtins as __builtin__ | |
| builtin_print = __builtin__.print | |
| def print(*args, **kwargs): | |
| force = kwargs.pop('force', False) | |
| if is_master or force: | |
| builtin_print(*args, **kwargs) | |
| __builtin__.print = print | |
| def is_dist_avail_and_initialized(): | |
| if not dist.is_available(): | |
| return False | |
| if not dist.is_initialized(): | |
| return False | |
| return True | |
| def get_world_size(): | |
| if not is_dist_avail_and_initialized(): | |
| return 1 | |
| return dist.get_world_size() | |
| def get_rank(): | |
| if not is_dist_avail_and_initialized(): | |
| return 0 | |
| return dist.get_rank() | |
| def is_main_process(): | |
| return get_rank() == 0 | |
| def save_on_master(*args, **kwargs): | |
| if is_main_process(): | |
| torch.save(*args, **kwargs) | |
| def init_distributed_mode(args): | |
| if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: | |
| args.rank = int(os.environ["RANK"]) | |
| args.world_size = int(os.environ['WORLD_SIZE']) | |
| args.gpu = int(os.environ['LOCAL_RANK']) | |
| elif 'SLURM_PROCID' in os.environ: | |
| args.rank = int(os.environ['SLURM_PROCID']) | |
| args.gpu = args.rank % torch.cuda.device_count() | |
| else: | |
| print('Not using distributed mode') | |
| args.distributed = False | |
| return | |
| args.distributed = True | |
| torch.cuda.set_device(args.gpu) | |
| args.dist_backend = 'nccl' | |
| print('| distributed init (rank {}): {}'.format( | |
| args.rank, args.dist_url), flush=True) | |
| torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, | |
| world_size=args.world_size, rank=args.rank) | |
| torch.distributed.barrier() | |
| setup_for_distributed(args.rank == 0) | |
| import json | |
| def save_on_master_eval_res(log_stats, output_dir): | |
| if is_main_process(): | |
| with open(output_dir, 'a') as f: | |
| f.write(json.dumps(log_stats) + "\n") |