Spaces:
Runtime error
Runtime error
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| from fairseq import utils | |
| from fairseq.models import ( | |
| FairseqLanguageModel, | |
| register_model, | |
| register_model_architecture, | |
| ) | |
| from fairseq.models.fconv import FConvDecoder | |
| from fairseq.utils import safe_hasattr | |
| class FConvLanguageModel(FairseqLanguageModel): | |
| def __init__(self, decoder): | |
| super().__init__(decoder) | |
| def add_args(parser): | |
| """Add model-specific arguments to the parser.""" | |
| parser.add_argument( | |
| "--dropout", type=float, metavar="D", help="dropout probability" | |
| ) | |
| parser.add_argument( | |
| "--decoder-embed-dim", | |
| type=int, | |
| metavar="N", | |
| help="decoder embedding dimension", | |
| ) | |
| parser.add_argument( | |
| "--decoder-layers", | |
| type=str, | |
| metavar="EXPR", | |
| help="decoder layers [(dim, kernel_size), ...]", | |
| ) | |
| parser.add_argument( | |
| "--decoder-out-embed-dim", | |
| type=int, | |
| metavar="N", | |
| help="decoder output embedding dimension", | |
| ) | |
| parser.add_argument( | |
| "--adaptive-softmax-cutoff", | |
| metavar="EXPR", | |
| help="comma separated list of adaptive softmax cutoff points. " | |
| "Must be used with adaptive_loss criterion", | |
| ) | |
| parser.add_argument( | |
| "--adaptive-softmax-dropout", | |
| type=float, | |
| metavar="D", | |
| help="sets adaptive softmax dropout for the tail projections", | |
| ) | |
| parser.add_argument( | |
| "--decoder-attention", | |
| type=str, | |
| metavar="EXPR", | |
| help="decoder attention [True, ...]", | |
| ) | |
| def build_model(cls, args, task): | |
| """Build a new model instance.""" | |
| # make sure all arguments are present in older models | |
| base_lm_architecture(args) | |
| if safe_hasattr(args, "max_target_positions") and not safe_hasattr( | |
| args, "tokens_per_sample" | |
| ): | |
| args.tokens_per_sample = args.max_target_positions | |
| decoder = FConvDecoder( | |
| dictionary=task.target_dictionary, | |
| embed_dim=args.decoder_embed_dim, | |
| convolutions=eval(args.decoder_layers), | |
| out_embed_dim=args.decoder_embed_dim, | |
| attention=eval(args.decoder_attention), | |
| dropout=args.dropout, | |
| max_positions=args.tokens_per_sample, | |
| share_embed=False, | |
| positional_embeddings=False, | |
| adaptive_softmax_cutoff=( | |
| utils.eval_str_list(args.adaptive_softmax_cutoff, type=int) | |
| if args.criterion == "adaptive_loss" | |
| else None | |
| ), | |
| adaptive_softmax_dropout=args.adaptive_softmax_dropout, | |
| ) | |
| return FConvLanguageModel(decoder) | |
| def base_lm_architecture(args): | |
| args.dropout = getattr(args, "dropout", 0.1) | |
| args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128) | |
| args.decoder_layers = getattr(args, "decoder_layers", "[(1268, 4)] * 13") | |
| args.decoder_attention = getattr(args, "decoder_attention", "False") | |
| args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) | |
| args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) | |
| def fconv_lm_dauphin_wikitext103(args): | |
| layers = "[(850, 6)] * 3" | |
| layers += " + [(850, 1)] * 1" | |
| layers += " + [(850, 5)] * 4" | |
| layers += " + [(850, 1)] * 1" | |
| layers += " + [(850, 4)] * 3" | |
| layers += " + [(1024, 4)] * 1" | |
| layers += " + [(2048, 4)] * 1" | |
| args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 280) | |
| args.decoder_layers = getattr(args, "decoder_layers", layers) | |
| args.decoder_attention = getattr(args, "decoder_attention", "False") | |
| args.adaptive_softmax_cutoff = getattr( | |
| args, "adaptive_softmax_cutoff", "10000,20000,200000" | |
| ) | |
| base_lm_architecture(args) | |
| def fconv_lm_dauphin_gbw(args): | |
| layers = "[(512, 5)]" | |
| layers += " + [(128, 1, 0), (128, 5, 0), (512, 1, 3)] * 3" | |
| layers += " + [(512, 1, 0), (512, 5, 0), (1024, 1, 3)] * 3" | |
| layers += " + [(1024, 1, 0), (1024, 5, 0), (2048, 1, 3)] * 6" | |
| layers += " + [(1024, 1, 0), (1024, 5, 0), (4096, 1, 3)]" | |
| args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128) | |
| args.decoder_layers = getattr(args, "decoder_layers", layers) | |
| args.decoder_attention = getattr(args, "decoder_attention", "False") | |
| args.adaptive_softmax_cutoff = getattr( | |
| args, "adaptive_softmax_cutoff", "10000,50000,200000" | |
| ) | |
| base_lm_architecture(args) | |