Spaces:

onnx
/

export

Running on CPU Upgrade

App Files Files Community

Felix Marty commited on Nov 3, 2022

Commit

be527a9

1 Parent(s): f75daf5

working version?

Browse files

Files changed (1) hide show

onnx_export.py +58 -54

onnx_export.py CHANGED Viewed

@@ -4,9 +4,7 @@ from optimum.exporters.onnx import OnnxConfigWithPast, export, validate_model_ou
 from tempfile import TemporaryDirectory
-from transformers import AutoConfig, is_torch_available
-from transformers import AutoConfig
 from pathlib import Path
@@ -29,55 +27,54 @@ def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discuss
             return discussion
 def convert_onnx(model_id: str, task: str, folder: str):
-        model_class = TasksManager.get_model_class_for_task(task)
-        config = AutoConfig.from_pretrained(model_id)
-        model = model_class.from_config(config)
-        device = "cpu"  # ?
-        # Dynamic axes aren't supported for YOLO-like models. This means they cannot be exported to ONNX on CUDA devices.
-        # See: https://github.com/ultralytics/yolov5/pull/8378
-        if model.__class__.__name__.startswith("Yolos") and device != "cpu":
-            return
-        onnx_config_class_constructor = TasksManager.get_exporter_config_constructor(model_type=config.model_type, exporter="onnx", task=task, model_name=model_id)
-        onnx_config = onnx_config_class_constructor(model.config)
-        # We need to set this to some value to be able to test the outputs values for batch size > 1.
-        if (
-            isinstance(onnx_config, OnnxConfigWithPast)
-            and getattr(model.config, "pad_token_id", None) is None
-            and task == "sequence-classification"
-        ):
-            model.config.pad_token_id = 0
-            if is_torch_available():
-                from optimum.exporters.onnx.utils import TORCH_VERSION
-                if not onnx_config.is_torch_support_available:
-                    print(
-                        "Skipping due to incompatible PyTorch version. Minimum required is"
-                        f" {onnx_config.MIN_TORCH_VERSION}, got: {TORCH_VERSION}"
-                    )
-                onnx_inputs, onnx_outputs = export(
-                    model, onnx_config, onnx_config.DEFAULT_ONNX_OPSET, Path(folder), device=device
-                )
-                atol = onnx_config.ATOL_FOR_VALIDATION
-                if isinstance(atol, dict):
-                    atol = atol[task.replace("-with-past", "")]
-                validate_model_outputs(
-                    onnx_config,
-                    model,
-                    Path(folder),
-                    onnx_outputs,
-                    atol,
-                )
-            # TODO: iterate in folder and add all
-            operations = [CommitOperationAdd(path_in_repo=local.split("/")[-1], path_or_fileobj=local) for local in local_filenames]
-            return operations
 def convert(api: "HfApi", model_id: str, task:str, force: bool=False) -> Optional["CommitInfo"]:
@@ -98,7 +95,14 @@ def convert(api: "HfApi", model_id: str, task:str, force: bool=False) -> Optiona
                 new_pr = pr
                 raise Exception(f"Model {model_id} already has an open PR check out {url}")
             else:
-                convert_onnx(model_id, task, folder)
         finally:
             shutil.rmtree(folder)
         return new_pr
@@ -113,12 +117,12 @@ if __name__ == "__main__":
     """
     parser = argparse.ArgumentParser(description=DESCRIPTION)
     parser.add_argument(
-        "model_id",
         type=str,
         help="The name of the model on the hub to convert. E.g. `gpt2` or `facebook/wav2vec2-base-960h`",
     )
     parser.add_argument(
-        "task",
         type=str,
         help="The task the model is performing",
     )

 from tempfile import TemporaryDirectory
+from transformers import AutoConfig, AutoTokenizer, is_torch_available
 from pathlib import Path
             return discussion
 def convert_onnx(model_id: str, task: str, folder: str):
+    # Allocate the model
+    model = TasksManager.get_model_from_task(task, model_id, framework="pt")
+    model_type = model.config.model_type.replace("_", "-")
+    model_name = getattr(model, "name", None)
+    onnx_config_constructor = TasksManager.get_exporter_config_constructor(
+        model_type, "onnx", task=task, model_name=model_name
+    )
+    onnx_config = onnx_config_constructor(model.config)
+    needs_pad_token_id = (
+        isinstance(onnx_config, OnnxConfigWithPast)
+        and getattr(model.config, "pad_token_id", None) is None
+        and task in ["sequence_classification"]
+    )
+    if needs_pad_token_id:
+        #if args.pad_token_id is not None:
+        #    model.config.pad_token_id = args.pad_token_id
+        try:
+            tok = AutoTokenizer.from_pretrained(model_id)
+            model.config.pad_token_id = tok.pad_token_id
+        except Exception:
+            raise ValueError(
+                "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument"
+            )
+    # Ensure the requested opset is sufficient
+    opset = onnx_config.DEFAULT_ONNX_OPSET
+    output = Path(folder).joinpath("model.onnx")
+    onnx_inputs, onnx_outputs = export(
+        model,
+        onnx_config,
+        opset,
+        output,
+    )
+    atol = onnx_config.ATOL_FOR_VALIDATION
+    if isinstance(atol, dict):
+        atol = atol[task.replace("-with-past", "")]
+    validate_model_outputs(onnx_config, model, output, onnx_outputs, atol)
+    print(f"All good, model saved at: {output}")
+    operations = [CommitOperationAdd(path_in_repo=file_name, path_or_fileobj=os.path.join(folder, file_name)) for file_name in os.listdir(folder)]
+    return operations
 def convert(api: "HfApi", model_id: str, task:str, force: bool=False) -> Optional["CommitInfo"]:
                 new_pr = pr
                 raise Exception(f"Model {model_id} already has an open PR check out {url}")
             else:
+                operations = convert_onnx(model_id, task, folder)
+                new_pr = api.create_commit(
+                    repo_id=model_id,
+                    operations=operations,
+                    commit_message=pr_title,
+                    create_pr=True,
+                )
         finally:
             shutil.rmtree(folder)
         return new_pr
     """
     parser = argparse.ArgumentParser(description=DESCRIPTION)
     parser.add_argument(
+        "--model_id",
         type=str,
         help="The name of the model on the hub to convert. E.g. `gpt2` or `facebook/wav2vec2-base-960h`",
     )
     parser.add_argument(
+        "--task",
         type=str,
         help="The task the model is performing",
     )